# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
!pip install seaborn
import seaborn as sns
!pip install plotly==4.14.3
import plotly.graph_objects as go
import warnings
import datetime as dt
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error , mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
import plotly.express as px
Requirement already satisfied: seaborn in ./opt/anaconda3/lib/python3.9/site-packages (0.11.2) Requirement already satisfied: matplotlib>=2.2 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (3.5.1) Requirement already satisfied: pandas>=0.23 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.4.2) Requirement already satisfied: numpy>=1.15 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.21.5) Requirement already satisfied: scipy>=1.0 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.7.3) Requirement already satisfied: packaging>=20.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: pyparsing>=2.2.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (3.0.4) Requirement already satisfied: python-dateutil>=2.7 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (1.3.2) Requirement already satisfied: fonttools>=4.22.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (4.25.0) Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: pillow>=6.2.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (9.0.1) Requirement already satisfied: pytz>=2020.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas>=0.23->seaborn) (2021.3) Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0) Requirement already satisfied: plotly==4.14.3 in ./opt/anaconda3/lib/python3.9/site-packages (4.14.3) Requirement already satisfied: six in ./opt/anaconda3/lib/python3.9/site-packages (from plotly==4.14.3) (1.16.0) Requirement already satisfied: retrying>=1.3.3 in ./opt/anaconda3/lib/python3.9/site-packages (from plotly==4.14.3) (1.3.3)
# Loading train and test datasets
train_set = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/train.csv')
test_set = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/test.csv')
#Printing a summary of the train dataset to find errors and missing values.
train_set.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3000888 entries, 0 to 3000887 Data columns (total 6 columns): # Column Dtype --- ------ ----- 0 id int64 1 date object 2 store_nbr int64 3 family object 4 sales float64 5 onpromotion int64 dtypes: float64(1), int64(3), object(2) memory usage: 137.4+ MB
# Previewing train data set dataset
train_set.head()
| id | date | store_nbr | family | sales | onpromotion | |
|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.0 | 0 |
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.0 | 0 |
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.0 | 0 |
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.0 | 0 |
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.0 | 0 |
#Printing a summary of the train dataset to find errors and missing values.
test_set.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 28512 entries, 0 to 28511 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 28512 non-null int64 1 date 28512 non-null object 2 store_nbr 28512 non-null int64 3 family 28512 non-null object 4 onpromotion 28512 non-null int64 dtypes: int64(3), object(2) memory usage: 1.1+ MB
#Previewing train data set dataset
test_set.head()
| id | date | store_nbr | family | onpromotion | |
|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 |
| 1 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 |
| 2 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 |
| 3 | 3000891 | 2017-08-16 | 1 | BEVERAGES | 20 |
| 4 | 3000892 | 2017-08-16 | 1 | BOOKS | 0 |
# Loading supplementary datasets
oil = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/oil.csv')
holidays = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/holidays_events.csv')
stores = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/stores.csv')
transactions = pd.read_csv('/Users/Admin/Desktop/store-sales-time-series-forecasting/transactions.csv')
#Previewing oil dataset
oil.head()
| date | dcoilwtico | |
|---|---|---|
| 0 | 2013-01-01 | NaN |
| 1 | 2013-01-02 | 93.14 |
| 2 | 2013-01-03 | 92.97 |
| 3 | 2013-01-04 | 93.12 |
| 4 | 2013-01-07 | 93.20 |
#Printing a summary of the oil dataset to find errors and missing values.
oil.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1218 entries, 0 to 1217 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 1218 non-null object 1 dcoilwtico 1175 non-null float64 dtypes: float64(1), object(1) memory usage: 19.2+ KB
#filling in missing values
oil = oil.ffill().bfill()
oil
| date | dcoilwtico | |
|---|---|---|
| 0 | 2013-01-01 | 93.14 |
| 1 | 2013-01-02 | 93.14 |
| 2 | 2013-01-03 | 92.97 |
| 3 | 2013-01-04 | 93.12 |
| 4 | 2013-01-07 | 93.20 |
| ... | ... | ... |
| 1213 | 2017-08-25 | 47.65 |
| 1214 | 2017-08-28 | 46.40 |
| 1215 | 2017-08-29 | 46.46 |
| 1216 | 2017-08-30 | 45.96 |
| 1217 | 2017-08-31 | 47.26 |
1218 rows × 2 columns
#previewing holidays dataset
holidays.head()
| date | type | locale | locale_name | description | transferred | |
|---|---|---|---|---|---|---|
| 0 | 2012-03-02 | Holiday | Local | Manta | Fundacion de Manta | False |
| 1 | 2012-04-01 | Holiday | Regional | Cotopaxi | Provincializacion de Cotopaxi | False |
| 2 | 2012-04-12 | Holiday | Local | Cuenca | Fundacion de Cuenca | False |
| 3 | 2012-04-14 | Holiday | Local | Libertad | Cantonizacion de Libertad | False |
| 4 | 2012-04-21 | Holiday | Local | Riobamba | Cantonizacion de Riobamba | False |
#Printing a summary of the holidays dataset to find errors and missing values.
holidays.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 350 entries, 0 to 349 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 350 non-null object 1 type 350 non-null object 2 locale 350 non-null object 3 locale_name 350 non-null object 4 description 350 non-null object 5 transferred 350 non-null bool dtypes: bool(1), object(5) memory usage: 14.1+ KB
#Previewing stores data set
stores.head()
| store_nbr | city | state | type | cluster | |
|---|---|---|---|---|---|
| 0 | 1 | Quito | Pichincha | D | 13 |
| 1 | 2 | Quito | Pichincha | D | 13 |
| 2 | 3 | Quito | Pichincha | D | 8 |
| 3 | 4 | Quito | Pichincha | D | 9 |
| 4 | 5 | Santo Domingo | Santo Domingo de los Tsachilas | D | 4 |
#Printing a summary of the stores dataset to find errors and missing values
stores.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 54 entries, 0 to 53 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 store_nbr 54 non-null int64 1 city 54 non-null object 2 state 54 non-null object 3 type 54 non-null object 4 cluster 54 non-null int64 dtypes: int64(2), object(3) memory usage: 2.2+ KB
# Previewing transactions data
transactions.head()
| date | store_nbr | transactions | |
|---|---|---|---|
| 0 | 2013-01-01 | 25 | 770 |
| 1 | 2013-01-02 | 1 | 2111 |
| 2 | 2013-01-02 | 2 | 2358 |
| 3 | 2013-01-02 | 3 | 3487 |
| 4 | 2013-01-02 | 4 | 1922 |
#Printing a summary of the transactions dataset to find errors and missing values
transactions.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 83488 entries, 0 to 83487 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 83488 non-null object 1 store_nbr 83488 non-null int64 2 transactions 83488 non-null int64 dtypes: int64(2), object(1) memory usage: 1.9+ MB
transactions.drop_duplicates(inplace = True)
transactions.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 83488 entries, 0 to 83487 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 83488 non-null object 1 store_nbr 83488 non-null int64 2 transactions 83488 non-null int64 dtypes: int64(2), object(1) memory usage: 2.5+ MB
stores.drop_duplicates(inplace = True)
stores.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 54 entries, 0 to 53 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 store_nbr 54 non-null int64 1 city 54 non-null object 2 state 54 non-null object 3 type 54 non-null object 4 cluster 54 non-null int64 dtypes: int64(2), object(3) memory usage: 2.5+ KB
train_set.store_nbr.unique()
array([ 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 3, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 4,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 5, 50, 51, 52, 53, 54, 6,
7, 8, 9])
test_set.store_nbr.unique()
array([ 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 3, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 4,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 5, 50, 51, 52, 53, 54, 6,
7, 8, 9])
stores.city.unique()
array(['Quito', 'Santo Domingo', 'Cayambe', 'Latacunga', 'Riobamba',
'Ibarra', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil', 'Salinas',
'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad', 'Cuenca',
'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'],
dtype=object)
stores.city
0 Quito 1 Quito 2 Quito 3 Quito 4 Santo Domingo 5 Quito 6 Quito 7 Quito 8 Quito 9 Quito 10 Cayambe 11 Latacunga 12 Latacunga 13 Riobamba 14 Ibarra 15 Santo Domingo 16 Quito 17 Quito 18 Guaranda 19 Quito 20 Santo Domingo 21 Puyo 22 Ambato 23 Guayaquil 24 Salinas 25 Guayaquil 26 Daule 27 Guayaquil 28 Guayaquil 29 Guayaquil 30 Babahoyo 31 Guayaquil 32 Quevedo 33 Guayaquil 34 Playas 35 Libertad 36 Cuenca 37 Loja 38 Cuenca 39 Machala 40 Machala 41 Cuenca 42 Esmeraldas 43 Quito 44 Quito 45 Quito 46 Quito 47 Quito 48 Quito 49 Ambato 50 Guayaquil 51 Manta 52 Manta 53 El Carmen Name: city, dtype: object
# checking the range of dates
train_set.date.min(), train_set.date.max()
('2013-01-01', '2017-08-15')
# Adding the sales date column to the train dataset
train_set['Sales_date'] = pd.to_datetime(train_set['date']).dt.date
# Checking for unique values in the train data set
train_set.Sales_date.nunique()
1684
#checking the range of dates
train_set['Sales_date'].min(), train_set['Sales_date'].max()
(datetime.date(2013, 1, 1), datetime.date(2017, 8, 15))
# checking completeness of dates
difference = train_set['Sales_date'].max() - train_set['Sales_date'].min()
difference
datetime.timedelta(days=1687)
# Expected dates in dataset
difference.days + 1
1688
#Actual date in dataset
train_set.Sales_date.nunique()
1684
# checking the range of dates
#test_set.date.min(), test_set.date.max()
# Adding the sales date column to the train dataset
#test_set['Sales_date'] = pd.to_datetime(test_set['date']).dt.date
# Checking for unique values in the test data set
#test_set.Sales_date.nunique()
#checking the range of dates
#test_set['Sales_date'].min(), test_set['Sales_date'].max()
# checking completeness of dates
#difference1 = test_set['Sales_date'].max() - test_set['Sales_date'].min()
#difference1
# Expected dates in dataset
#difference1.days + 1
#Actual date in dataset
#test_set.Sales_date.nunique()
# Finding the head and tail of dates
expected_dates = pd.date_range(start= train_set['Sales_date'].min(), end = train_set['Sales_date'].max())
expected_dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
'2013-01-09', '2013-01-10',
...
'2017-08-06', '2017-08-07', '2017-08-08', '2017-08-09',
'2017-08-10', '2017-08-11', '2017-08-12', '2017-08-13',
'2017-08-14', '2017-08-15'],
dtype='datetime64[ns]', length=1688, freq='D')
# Finding the missing dates in both sets
set(expected_dates.date) - set(train_set.Sales_date.unique())
{datetime.date(2013, 12, 25),
datetime.date(2014, 12, 25),
datetime.date(2015, 12, 25),
datetime.date(2016, 12, 25)}
train_set.head(5)
| id | date | store_nbr | family | sales | onpromotion | Sales_date | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.0 | 0 | 2013-01-01 |
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.0 | 0 | 2013-01-01 |
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.0 | 0 | 2013-01-01 |
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.0 | 0 | 2013-01-01 |
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.0 | 0 | 2013-01-01 |
# Importing products from itertools
from itertools import product
missing_dates = set(expected_dates.date) - set(train_set.Sales_date.unique())
unique_stores = train_set.store_nbr.unique()
unique_families = train_set.family.unique()
# Finding missing data
missing_data = list(product(missing_dates, unique_stores, unique_families))
missing_data
[(datetime.date(2014, 12, 25), 1, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 1, 'BABY CARE'), (datetime.date(2014, 12, 25), 1, 'BEAUTY'), (datetime.date(2014, 12, 25), 1, 'BEVERAGES'), (datetime.date(2014, 12, 25), 1, 'BOOKS'), (datetime.date(2014, 12, 25), 1, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 1, 'CELEBRATION'), (datetime.date(2014, 12, 25), 1, 'CLEANING'), (datetime.date(2014, 12, 25), 1, 'DAIRY'), (datetime.date(2014, 12, 25), 1, 'DELI'), (datetime.date(2014, 12, 25), 1, 'EGGS'), (datetime.date(2014, 12, 25), 1, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 1, 'GROCERY I'), (datetime.date(2014, 12, 25), 1, 'GROCERY II'), (datetime.date(2014, 12, 25), 1, 'HARDWARE'), (datetime.date(2014, 12, 25), 1, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 1, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 1, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 1, 'HOME CARE'), (datetime.date(2014, 12, 25), 1, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 1, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 1, 'LINGERIE'), (datetime.date(2014, 12, 25), 1, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 1, 'MAGAZINES'), (datetime.date(2014, 12, 25), 1, 'MEATS'), (datetime.date(2014, 12, 25), 1, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 1, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 1, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 1, 'POULTRY'), (datetime.date(2014, 12, 25), 1, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 1, 'PRODUCE'), (datetime.date(2014, 12, 25), 1, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 1, 'SEAFOOD'), (datetime.date(2014, 12, 25), 10, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 10, 'BABY CARE'), (datetime.date(2014, 12, 25), 10, 'BEAUTY'), (datetime.date(2014, 12, 25), 10, 'BEVERAGES'), (datetime.date(2014, 12, 25), 10, 'BOOKS'), (datetime.date(2014, 12, 25), 10, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 10, 'CELEBRATION'), (datetime.date(2014, 12, 25), 10, 'CLEANING'), (datetime.date(2014, 12, 25), 10, 'DAIRY'), (datetime.date(2014, 12, 25), 10, 'DELI'), (datetime.date(2014, 12, 25), 10, 'EGGS'), (datetime.date(2014, 12, 25), 10, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 10, 'GROCERY I'), (datetime.date(2014, 12, 25), 10, 'GROCERY II'), (datetime.date(2014, 12, 25), 10, 'HARDWARE'), (datetime.date(2014, 12, 25), 10, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 10, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 10, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 10, 'HOME CARE'), (datetime.date(2014, 12, 25), 10, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 10, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 10, 'LINGERIE'), (datetime.date(2014, 12, 25), 10, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 10, 'MAGAZINES'), (datetime.date(2014, 12, 25), 10, 'MEATS'), (datetime.date(2014, 12, 25), 10, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 10, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 10, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 10, 'POULTRY'), (datetime.date(2014, 12, 25), 10, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 10, 'PRODUCE'), (datetime.date(2014, 12, 25), 10, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 10, 'SEAFOOD'), (datetime.date(2014, 12, 25), 11, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 11, 'BABY CARE'), (datetime.date(2014, 12, 25), 11, 'BEAUTY'), (datetime.date(2014, 12, 25), 11, 'BEVERAGES'), (datetime.date(2014, 12, 25), 11, 'BOOKS'), (datetime.date(2014, 12, 25), 11, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 11, 'CELEBRATION'), (datetime.date(2014, 12, 25), 11, 'CLEANING'), (datetime.date(2014, 12, 25), 11, 'DAIRY'), (datetime.date(2014, 12, 25), 11, 'DELI'), (datetime.date(2014, 12, 25), 11, 'EGGS'), (datetime.date(2014, 12, 25), 11, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 11, 'GROCERY I'), (datetime.date(2014, 12, 25), 11, 'GROCERY II'), (datetime.date(2014, 12, 25), 11, 'HARDWARE'), (datetime.date(2014, 12, 25), 11, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 11, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 11, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 11, 'HOME CARE'), (datetime.date(2014, 12, 25), 11, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 11, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 11, 'LINGERIE'), (datetime.date(2014, 12, 25), 11, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 11, 'MAGAZINES'), (datetime.date(2014, 12, 25), 11, 'MEATS'), (datetime.date(2014, 12, 25), 11, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 11, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 11, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 11, 'POULTRY'), (datetime.date(2014, 12, 25), 11, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 11, 'PRODUCE'), (datetime.date(2014, 12, 25), 11, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 11, 'SEAFOOD'), (datetime.date(2014, 12, 25), 12, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 12, 'BABY CARE'), (datetime.date(2014, 12, 25), 12, 'BEAUTY'), (datetime.date(2014, 12, 25), 12, 'BEVERAGES'), (datetime.date(2014, 12, 25), 12, 'BOOKS'), (datetime.date(2014, 12, 25), 12, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 12, 'CELEBRATION'), (datetime.date(2014, 12, 25), 12, 'CLEANING'), (datetime.date(2014, 12, 25), 12, 'DAIRY'), (datetime.date(2014, 12, 25), 12, 'DELI'), (datetime.date(2014, 12, 25), 12, 'EGGS'), (datetime.date(2014, 12, 25), 12, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 12, 'GROCERY I'), (datetime.date(2014, 12, 25), 12, 'GROCERY II'), (datetime.date(2014, 12, 25), 12, 'HARDWARE'), (datetime.date(2014, 12, 25), 12, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 12, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 12, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 12, 'HOME CARE'), (datetime.date(2014, 12, 25), 12, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 12, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 12, 'LINGERIE'), (datetime.date(2014, 12, 25), 12, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 12, 'MAGAZINES'), (datetime.date(2014, 12, 25), 12, 'MEATS'), (datetime.date(2014, 12, 25), 12, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 12, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 12, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 12, 'POULTRY'), (datetime.date(2014, 12, 25), 12, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 12, 'PRODUCE'), (datetime.date(2014, 12, 25), 12, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 12, 'SEAFOOD'), (datetime.date(2014, 12, 25), 13, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 13, 'BABY CARE'), (datetime.date(2014, 12, 25), 13, 'BEAUTY'), (datetime.date(2014, 12, 25), 13, 'BEVERAGES'), (datetime.date(2014, 12, 25), 13, 'BOOKS'), (datetime.date(2014, 12, 25), 13, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 13, 'CELEBRATION'), (datetime.date(2014, 12, 25), 13, 'CLEANING'), (datetime.date(2014, 12, 25), 13, 'DAIRY'), (datetime.date(2014, 12, 25), 13, 'DELI'), (datetime.date(2014, 12, 25), 13, 'EGGS'), (datetime.date(2014, 12, 25), 13, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 13, 'GROCERY I'), (datetime.date(2014, 12, 25), 13, 'GROCERY II'), (datetime.date(2014, 12, 25), 13, 'HARDWARE'), (datetime.date(2014, 12, 25), 13, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 13, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 13, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 13, 'HOME CARE'), (datetime.date(2014, 12, 25), 13, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 13, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 13, 'LINGERIE'), (datetime.date(2014, 12, 25), 13, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 13, 'MAGAZINES'), (datetime.date(2014, 12, 25), 13, 'MEATS'), (datetime.date(2014, 12, 25), 13, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 13, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 13, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 13, 'POULTRY'), (datetime.date(2014, 12, 25), 13, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 13, 'PRODUCE'), (datetime.date(2014, 12, 25), 13, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 13, 'SEAFOOD'), (datetime.date(2014, 12, 25), 14, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 14, 'BABY CARE'), (datetime.date(2014, 12, 25), 14, 'BEAUTY'), (datetime.date(2014, 12, 25), 14, 'BEVERAGES'), (datetime.date(2014, 12, 25), 14, 'BOOKS'), (datetime.date(2014, 12, 25), 14, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 14, 'CELEBRATION'), (datetime.date(2014, 12, 25), 14, 'CLEANING'), (datetime.date(2014, 12, 25), 14, 'DAIRY'), (datetime.date(2014, 12, 25), 14, 'DELI'), (datetime.date(2014, 12, 25), 14, 'EGGS'), (datetime.date(2014, 12, 25), 14, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 14, 'GROCERY I'), (datetime.date(2014, 12, 25), 14, 'GROCERY II'), (datetime.date(2014, 12, 25), 14, 'HARDWARE'), (datetime.date(2014, 12, 25), 14, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 14, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 14, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 14, 'HOME CARE'), (datetime.date(2014, 12, 25), 14, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 14, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 14, 'LINGERIE'), (datetime.date(2014, 12, 25), 14, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 14, 'MAGAZINES'), (datetime.date(2014, 12, 25), 14, 'MEATS'), (datetime.date(2014, 12, 25), 14, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 14, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 14, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 14, 'POULTRY'), (datetime.date(2014, 12, 25), 14, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 14, 'PRODUCE'), (datetime.date(2014, 12, 25), 14, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 14, 'SEAFOOD'), (datetime.date(2014, 12, 25), 15, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 15, 'BABY CARE'), (datetime.date(2014, 12, 25), 15, 'BEAUTY'), (datetime.date(2014, 12, 25), 15, 'BEVERAGES'), (datetime.date(2014, 12, 25), 15, 'BOOKS'), (datetime.date(2014, 12, 25), 15, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 15, 'CELEBRATION'), (datetime.date(2014, 12, 25), 15, 'CLEANING'), (datetime.date(2014, 12, 25), 15, 'DAIRY'), (datetime.date(2014, 12, 25), 15, 'DELI'), (datetime.date(2014, 12, 25), 15, 'EGGS'), (datetime.date(2014, 12, 25), 15, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 15, 'GROCERY I'), (datetime.date(2014, 12, 25), 15, 'GROCERY II'), (datetime.date(2014, 12, 25), 15, 'HARDWARE'), (datetime.date(2014, 12, 25), 15, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 15, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 15, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 15, 'HOME CARE'), (datetime.date(2014, 12, 25), 15, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 15, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 15, 'LINGERIE'), (datetime.date(2014, 12, 25), 15, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 15, 'MAGAZINES'), (datetime.date(2014, 12, 25), 15, 'MEATS'), (datetime.date(2014, 12, 25), 15, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 15, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 15, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 15, 'POULTRY'), (datetime.date(2014, 12, 25), 15, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 15, 'PRODUCE'), (datetime.date(2014, 12, 25), 15, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 15, 'SEAFOOD'), (datetime.date(2014, 12, 25), 16, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 16, 'BABY CARE'), (datetime.date(2014, 12, 25), 16, 'BEAUTY'), (datetime.date(2014, 12, 25), 16, 'BEVERAGES'), (datetime.date(2014, 12, 25), 16, 'BOOKS'), (datetime.date(2014, 12, 25), 16, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 16, 'CELEBRATION'), (datetime.date(2014, 12, 25), 16, 'CLEANING'), (datetime.date(2014, 12, 25), 16, 'DAIRY'), (datetime.date(2014, 12, 25), 16, 'DELI'), (datetime.date(2014, 12, 25), 16, 'EGGS'), (datetime.date(2014, 12, 25), 16, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 16, 'GROCERY I'), (datetime.date(2014, 12, 25), 16, 'GROCERY II'), (datetime.date(2014, 12, 25), 16, 'HARDWARE'), (datetime.date(2014, 12, 25), 16, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 16, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 16, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 16, 'HOME CARE'), (datetime.date(2014, 12, 25), 16, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 16, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 16, 'LINGERIE'), (datetime.date(2014, 12, 25), 16, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 16, 'MAGAZINES'), (datetime.date(2014, 12, 25), 16, 'MEATS'), (datetime.date(2014, 12, 25), 16, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 16, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 16, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 16, 'POULTRY'), (datetime.date(2014, 12, 25), 16, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 16, 'PRODUCE'), (datetime.date(2014, 12, 25), 16, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 16, 'SEAFOOD'), (datetime.date(2014, 12, 25), 17, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 17, 'BABY CARE'), (datetime.date(2014, 12, 25), 17, 'BEAUTY'), (datetime.date(2014, 12, 25), 17, 'BEVERAGES'), (datetime.date(2014, 12, 25), 17, 'BOOKS'), (datetime.date(2014, 12, 25), 17, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 17, 'CELEBRATION'), (datetime.date(2014, 12, 25), 17, 'CLEANING'), (datetime.date(2014, 12, 25), 17, 'DAIRY'), (datetime.date(2014, 12, 25), 17, 'DELI'), (datetime.date(2014, 12, 25), 17, 'EGGS'), (datetime.date(2014, 12, 25), 17, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 17, 'GROCERY I'), (datetime.date(2014, 12, 25), 17, 'GROCERY II'), (datetime.date(2014, 12, 25), 17, 'HARDWARE'), (datetime.date(2014, 12, 25), 17, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 17, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 17, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 17, 'HOME CARE'), (datetime.date(2014, 12, 25), 17, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 17, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 17, 'LINGERIE'), (datetime.date(2014, 12, 25), 17, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 17, 'MAGAZINES'), (datetime.date(2014, 12, 25), 17, 'MEATS'), (datetime.date(2014, 12, 25), 17, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 17, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 17, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 17, 'POULTRY'), (datetime.date(2014, 12, 25), 17, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 17, 'PRODUCE'), (datetime.date(2014, 12, 25), 17, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 17, 'SEAFOOD'), (datetime.date(2014, 12, 25), 18, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 18, 'BABY CARE'), (datetime.date(2014, 12, 25), 18, 'BEAUTY'), (datetime.date(2014, 12, 25), 18, 'BEVERAGES'), (datetime.date(2014, 12, 25), 18, 'BOOKS'), (datetime.date(2014, 12, 25), 18, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 18, 'CELEBRATION'), (datetime.date(2014, 12, 25), 18, 'CLEANING'), (datetime.date(2014, 12, 25), 18, 'DAIRY'), (datetime.date(2014, 12, 25), 18, 'DELI'), (datetime.date(2014, 12, 25), 18, 'EGGS'), (datetime.date(2014, 12, 25), 18, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 18, 'GROCERY I'), (datetime.date(2014, 12, 25), 18, 'GROCERY II'), (datetime.date(2014, 12, 25), 18, 'HARDWARE'), (datetime.date(2014, 12, 25), 18, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 18, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 18, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 18, 'HOME CARE'), (datetime.date(2014, 12, 25), 18, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 18, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 18, 'LINGERIE'), (datetime.date(2014, 12, 25), 18, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 18, 'MAGAZINES'), (datetime.date(2014, 12, 25), 18, 'MEATS'), (datetime.date(2014, 12, 25), 18, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 18, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 18, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 18, 'POULTRY'), (datetime.date(2014, 12, 25), 18, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 18, 'PRODUCE'), (datetime.date(2014, 12, 25), 18, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 18, 'SEAFOOD'), (datetime.date(2014, 12, 25), 19, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 19, 'BABY CARE'), (datetime.date(2014, 12, 25), 19, 'BEAUTY'), (datetime.date(2014, 12, 25), 19, 'BEVERAGES'), (datetime.date(2014, 12, 25), 19, 'BOOKS'), (datetime.date(2014, 12, 25), 19, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 19, 'CELEBRATION'), (datetime.date(2014, 12, 25), 19, 'CLEANING'), (datetime.date(2014, 12, 25), 19, 'DAIRY'), (datetime.date(2014, 12, 25), 19, 'DELI'), (datetime.date(2014, 12, 25), 19, 'EGGS'), (datetime.date(2014, 12, 25), 19, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 19, 'GROCERY I'), (datetime.date(2014, 12, 25), 19, 'GROCERY II'), (datetime.date(2014, 12, 25), 19, 'HARDWARE'), (datetime.date(2014, 12, 25), 19, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 19, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 19, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 19, 'HOME CARE'), (datetime.date(2014, 12, 25), 19, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 19, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 19, 'LINGERIE'), (datetime.date(2014, 12, 25), 19, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 19, 'MAGAZINES'), (datetime.date(2014, 12, 25), 19, 'MEATS'), (datetime.date(2014, 12, 25), 19, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 19, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 19, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 19, 'POULTRY'), (datetime.date(2014, 12, 25), 19, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 19, 'PRODUCE'), (datetime.date(2014, 12, 25), 19, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 19, 'SEAFOOD'), (datetime.date(2014, 12, 25), 2, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 2, 'BABY CARE'), (datetime.date(2014, 12, 25), 2, 'BEAUTY'), (datetime.date(2014, 12, 25), 2, 'BEVERAGES'), (datetime.date(2014, 12, 25), 2, 'BOOKS'), (datetime.date(2014, 12, 25), 2, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 2, 'CELEBRATION'), (datetime.date(2014, 12, 25), 2, 'CLEANING'), (datetime.date(2014, 12, 25), 2, 'DAIRY'), (datetime.date(2014, 12, 25), 2, 'DELI'), (datetime.date(2014, 12, 25), 2, 'EGGS'), (datetime.date(2014, 12, 25), 2, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 2, 'GROCERY I'), (datetime.date(2014, 12, 25), 2, 'GROCERY II'), (datetime.date(2014, 12, 25), 2, 'HARDWARE'), (datetime.date(2014, 12, 25), 2, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 2, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 2, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 2, 'HOME CARE'), (datetime.date(2014, 12, 25), 2, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 2, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 2, 'LINGERIE'), (datetime.date(2014, 12, 25), 2, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 2, 'MAGAZINES'), (datetime.date(2014, 12, 25), 2, 'MEATS'), (datetime.date(2014, 12, 25), 2, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 2, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 2, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 2, 'POULTRY'), (datetime.date(2014, 12, 25), 2, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 2, 'PRODUCE'), (datetime.date(2014, 12, 25), 2, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 2, 'SEAFOOD'), (datetime.date(2014, 12, 25), 20, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 20, 'BABY CARE'), (datetime.date(2014, 12, 25), 20, 'BEAUTY'), (datetime.date(2014, 12, 25), 20, 'BEVERAGES'), (datetime.date(2014, 12, 25), 20, 'BOOKS'), (datetime.date(2014, 12, 25), 20, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 20, 'CELEBRATION'), (datetime.date(2014, 12, 25), 20, 'CLEANING'), (datetime.date(2014, 12, 25), 20, 'DAIRY'), (datetime.date(2014, 12, 25), 20, 'DELI'), (datetime.date(2014, 12, 25), 20, 'EGGS'), (datetime.date(2014, 12, 25), 20, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 20, 'GROCERY I'), (datetime.date(2014, 12, 25), 20, 'GROCERY II'), (datetime.date(2014, 12, 25), 20, 'HARDWARE'), (datetime.date(2014, 12, 25), 20, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 20, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 20, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 20, 'HOME CARE'), (datetime.date(2014, 12, 25), 20, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 20, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 20, 'LINGERIE'), (datetime.date(2014, 12, 25), 20, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 20, 'MAGAZINES'), (datetime.date(2014, 12, 25), 20, 'MEATS'), (datetime.date(2014, 12, 25), 20, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 20, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 20, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 20, 'POULTRY'), (datetime.date(2014, 12, 25), 20, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 20, 'PRODUCE'), (datetime.date(2014, 12, 25), 20, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 20, 'SEAFOOD'), (datetime.date(2014, 12, 25), 21, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 21, 'BABY CARE'), (datetime.date(2014, 12, 25), 21, 'BEAUTY'), (datetime.date(2014, 12, 25), 21, 'BEVERAGES'), (datetime.date(2014, 12, 25), 21, 'BOOKS'), (datetime.date(2014, 12, 25), 21, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 21, 'CELEBRATION'), (datetime.date(2014, 12, 25), 21, 'CLEANING'), (datetime.date(2014, 12, 25), 21, 'DAIRY'), (datetime.date(2014, 12, 25), 21, 'DELI'), (datetime.date(2014, 12, 25), 21, 'EGGS'), (datetime.date(2014, 12, 25), 21, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 21, 'GROCERY I'), (datetime.date(2014, 12, 25), 21, 'GROCERY II'), (datetime.date(2014, 12, 25), 21, 'HARDWARE'), (datetime.date(2014, 12, 25), 21, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 21, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 21, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 21, 'HOME CARE'), (datetime.date(2014, 12, 25), 21, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 21, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 21, 'LINGERIE'), (datetime.date(2014, 12, 25), 21, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 21, 'MAGAZINES'), (datetime.date(2014, 12, 25), 21, 'MEATS'), (datetime.date(2014, 12, 25), 21, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 21, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 21, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 21, 'POULTRY'), (datetime.date(2014, 12, 25), 21, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 21, 'PRODUCE'), (datetime.date(2014, 12, 25), 21, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 21, 'SEAFOOD'), (datetime.date(2014, 12, 25), 22, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 22, 'BABY CARE'), (datetime.date(2014, 12, 25), 22, 'BEAUTY'), (datetime.date(2014, 12, 25), 22, 'BEVERAGES'), (datetime.date(2014, 12, 25), 22, 'BOOKS'), (datetime.date(2014, 12, 25), 22, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 22, 'CELEBRATION'), (datetime.date(2014, 12, 25), 22, 'CLEANING'), (datetime.date(2014, 12, 25), 22, 'DAIRY'), (datetime.date(2014, 12, 25), 22, 'DELI'), (datetime.date(2014, 12, 25), 22, 'EGGS'), (datetime.date(2014, 12, 25), 22, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 22, 'GROCERY I'), (datetime.date(2014, 12, 25), 22, 'GROCERY II'), (datetime.date(2014, 12, 25), 22, 'HARDWARE'), (datetime.date(2014, 12, 25), 22, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 22, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 22, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 22, 'HOME CARE'), (datetime.date(2014, 12, 25), 22, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 22, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 22, 'LINGERIE'), (datetime.date(2014, 12, 25), 22, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 22, 'MAGAZINES'), (datetime.date(2014, 12, 25), 22, 'MEATS'), (datetime.date(2014, 12, 25), 22, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 22, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 22, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 22, 'POULTRY'), (datetime.date(2014, 12, 25), 22, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 22, 'PRODUCE'), (datetime.date(2014, 12, 25), 22, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 22, 'SEAFOOD'), (datetime.date(2014, 12, 25), 23, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 23, 'BABY CARE'), (datetime.date(2014, 12, 25), 23, 'BEAUTY'), (datetime.date(2014, 12, 25), 23, 'BEVERAGES'), (datetime.date(2014, 12, 25), 23, 'BOOKS'), (datetime.date(2014, 12, 25), 23, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 23, 'CELEBRATION'), (datetime.date(2014, 12, 25), 23, 'CLEANING'), (datetime.date(2014, 12, 25), 23, 'DAIRY'), (datetime.date(2014, 12, 25), 23, 'DELI'), (datetime.date(2014, 12, 25), 23, 'EGGS'), (datetime.date(2014, 12, 25), 23, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 23, 'GROCERY I'), (datetime.date(2014, 12, 25), 23, 'GROCERY II'), (datetime.date(2014, 12, 25), 23, 'HARDWARE'), (datetime.date(2014, 12, 25), 23, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 23, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 23, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 23, 'HOME CARE'), (datetime.date(2014, 12, 25), 23, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 23, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 23, 'LINGERIE'), (datetime.date(2014, 12, 25), 23, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 23, 'MAGAZINES'), (datetime.date(2014, 12, 25), 23, 'MEATS'), (datetime.date(2014, 12, 25), 23, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 23, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 23, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 23, 'POULTRY'), (datetime.date(2014, 12, 25), 23, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 23, 'PRODUCE'), (datetime.date(2014, 12, 25), 23, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 23, 'SEAFOOD'), (datetime.date(2014, 12, 25), 24, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 24, 'BABY CARE'), (datetime.date(2014, 12, 25), 24, 'BEAUTY'), (datetime.date(2014, 12, 25), 24, 'BEVERAGES'), (datetime.date(2014, 12, 25), 24, 'BOOKS'), (datetime.date(2014, 12, 25), 24, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 24, 'CELEBRATION'), (datetime.date(2014, 12, 25), 24, 'CLEANING'), (datetime.date(2014, 12, 25), 24, 'DAIRY'), (datetime.date(2014, 12, 25), 24, 'DELI'), (datetime.date(2014, 12, 25), 24, 'EGGS'), (datetime.date(2014, 12, 25), 24, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 24, 'GROCERY I'), (datetime.date(2014, 12, 25), 24, 'GROCERY II'), (datetime.date(2014, 12, 25), 24, 'HARDWARE'), (datetime.date(2014, 12, 25), 24, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 24, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 24, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 24, 'HOME CARE'), (datetime.date(2014, 12, 25), 24, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 24, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 24, 'LINGERIE'), (datetime.date(2014, 12, 25), 24, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 24, 'MAGAZINES'), (datetime.date(2014, 12, 25), 24, 'MEATS'), (datetime.date(2014, 12, 25), 24, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 24, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 24, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 24, 'POULTRY'), (datetime.date(2014, 12, 25), 24, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 24, 'PRODUCE'), (datetime.date(2014, 12, 25), 24, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 24, 'SEAFOOD'), (datetime.date(2014, 12, 25), 25, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 25, 'BABY CARE'), (datetime.date(2014, 12, 25), 25, 'BEAUTY'), (datetime.date(2014, 12, 25), 25, 'BEVERAGES'), (datetime.date(2014, 12, 25), 25, 'BOOKS'), (datetime.date(2014, 12, 25), 25, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 25, 'CELEBRATION'), (datetime.date(2014, 12, 25), 25, 'CLEANING'), (datetime.date(2014, 12, 25), 25, 'DAIRY'), (datetime.date(2014, 12, 25), 25, 'DELI'), (datetime.date(2014, 12, 25), 25, 'EGGS'), (datetime.date(2014, 12, 25), 25, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 25, 'GROCERY I'), (datetime.date(2014, 12, 25), 25, 'GROCERY II'), (datetime.date(2014, 12, 25), 25, 'HARDWARE'), (datetime.date(2014, 12, 25), 25, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 25, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 25, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 25, 'HOME CARE'), (datetime.date(2014, 12, 25), 25, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 25, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 25, 'LINGERIE'), (datetime.date(2014, 12, 25), 25, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 25, 'MAGAZINES'), (datetime.date(2014, 12, 25), 25, 'MEATS'), (datetime.date(2014, 12, 25), 25, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 25, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 25, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 25, 'POULTRY'), (datetime.date(2014, 12, 25), 25, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 25, 'PRODUCE'), (datetime.date(2014, 12, 25), 25, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 25, 'SEAFOOD'), (datetime.date(2014, 12, 25), 26, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 26, 'BABY CARE'), (datetime.date(2014, 12, 25), 26, 'BEAUTY'), (datetime.date(2014, 12, 25), 26, 'BEVERAGES'), (datetime.date(2014, 12, 25), 26, 'BOOKS'), (datetime.date(2014, 12, 25), 26, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 26, 'CELEBRATION'), (datetime.date(2014, 12, 25), 26, 'CLEANING'), (datetime.date(2014, 12, 25), 26, 'DAIRY'), (datetime.date(2014, 12, 25), 26, 'DELI'), (datetime.date(2014, 12, 25), 26, 'EGGS'), (datetime.date(2014, 12, 25), 26, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 26, 'GROCERY I'), (datetime.date(2014, 12, 25), 26, 'GROCERY II'), (datetime.date(2014, 12, 25), 26, 'HARDWARE'), (datetime.date(2014, 12, 25), 26, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 26, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 26, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 26, 'HOME CARE'), (datetime.date(2014, 12, 25), 26, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 26, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 26, 'LINGERIE'), (datetime.date(2014, 12, 25), 26, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 26, 'MAGAZINES'), (datetime.date(2014, 12, 25), 26, 'MEATS'), (datetime.date(2014, 12, 25), 26, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 26, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 26, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 26, 'POULTRY'), (datetime.date(2014, 12, 25), 26, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 26, 'PRODUCE'), (datetime.date(2014, 12, 25), 26, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 26, 'SEAFOOD'), (datetime.date(2014, 12, 25), 27, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 27, 'BABY CARE'), (datetime.date(2014, 12, 25), 27, 'BEAUTY'), (datetime.date(2014, 12, 25), 27, 'BEVERAGES'), (datetime.date(2014, 12, 25), 27, 'BOOKS'), (datetime.date(2014, 12, 25), 27, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 27, 'CELEBRATION'), (datetime.date(2014, 12, 25), 27, 'CLEANING'), (datetime.date(2014, 12, 25), 27, 'DAIRY'), (datetime.date(2014, 12, 25), 27, 'DELI'), (datetime.date(2014, 12, 25), 27, 'EGGS'), (datetime.date(2014, 12, 25), 27, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 27, 'GROCERY I'), (datetime.date(2014, 12, 25), 27, 'GROCERY II'), (datetime.date(2014, 12, 25), 27, 'HARDWARE'), (datetime.date(2014, 12, 25), 27, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 27, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 27, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 27, 'HOME CARE'), (datetime.date(2014, 12, 25), 27, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 27, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 27, 'LINGERIE'), (datetime.date(2014, 12, 25), 27, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 27, 'MAGAZINES'), (datetime.date(2014, 12, 25), 27, 'MEATS'), (datetime.date(2014, 12, 25), 27, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 27, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 27, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 27, 'POULTRY'), (datetime.date(2014, 12, 25), 27, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 27, 'PRODUCE'), (datetime.date(2014, 12, 25), 27, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 27, 'SEAFOOD'), (datetime.date(2014, 12, 25), 28, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 28, 'BABY CARE'), (datetime.date(2014, 12, 25), 28, 'BEAUTY'), (datetime.date(2014, 12, 25), 28, 'BEVERAGES'), (datetime.date(2014, 12, 25), 28, 'BOOKS'), (datetime.date(2014, 12, 25), 28, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 28, 'CELEBRATION'), (datetime.date(2014, 12, 25), 28, 'CLEANING'), (datetime.date(2014, 12, 25), 28, 'DAIRY'), (datetime.date(2014, 12, 25), 28, 'DELI'), (datetime.date(2014, 12, 25), 28, 'EGGS'), (datetime.date(2014, 12, 25), 28, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 28, 'GROCERY I'), (datetime.date(2014, 12, 25), 28, 'GROCERY II'), (datetime.date(2014, 12, 25), 28, 'HARDWARE'), (datetime.date(2014, 12, 25), 28, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 28, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 28, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 28, 'HOME CARE'), (datetime.date(2014, 12, 25), 28, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 28, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 28, 'LINGERIE'), (datetime.date(2014, 12, 25), 28, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 28, 'MAGAZINES'), (datetime.date(2014, 12, 25), 28, 'MEATS'), (datetime.date(2014, 12, 25), 28, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 28, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 28, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 28, 'POULTRY'), (datetime.date(2014, 12, 25), 28, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 28, 'PRODUCE'), (datetime.date(2014, 12, 25), 28, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 28, 'SEAFOOD'), (datetime.date(2014, 12, 25), 29, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 29, 'BABY CARE'), (datetime.date(2014, 12, 25), 29, 'BEAUTY'), (datetime.date(2014, 12, 25), 29, 'BEVERAGES'), (datetime.date(2014, 12, 25), 29, 'BOOKS'), (datetime.date(2014, 12, 25), 29, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 29, 'CELEBRATION'), (datetime.date(2014, 12, 25), 29, 'CLEANING'), (datetime.date(2014, 12, 25), 29, 'DAIRY'), (datetime.date(2014, 12, 25), 29, 'DELI'), (datetime.date(2014, 12, 25), 29, 'EGGS'), (datetime.date(2014, 12, 25), 29, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 29, 'GROCERY I'), (datetime.date(2014, 12, 25), 29, 'GROCERY II'), (datetime.date(2014, 12, 25), 29, 'HARDWARE'), (datetime.date(2014, 12, 25), 29, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 29, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 29, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 29, 'HOME CARE'), (datetime.date(2014, 12, 25), 29, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 29, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 29, 'LINGERIE'), (datetime.date(2014, 12, 25), 29, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 29, 'MAGAZINES'), (datetime.date(2014, 12, 25), 29, 'MEATS'), (datetime.date(2014, 12, 25), 29, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 29, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 29, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 29, 'POULTRY'), (datetime.date(2014, 12, 25), 29, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 29, 'PRODUCE'), (datetime.date(2014, 12, 25), 29, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 29, 'SEAFOOD'), (datetime.date(2014, 12, 25), 3, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 3, 'BABY CARE'), (datetime.date(2014, 12, 25), 3, 'BEAUTY'), (datetime.date(2014, 12, 25), 3, 'BEVERAGES'), (datetime.date(2014, 12, 25), 3, 'BOOKS'), (datetime.date(2014, 12, 25), 3, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 3, 'CELEBRATION'), (datetime.date(2014, 12, 25), 3, 'CLEANING'), (datetime.date(2014, 12, 25), 3, 'DAIRY'), (datetime.date(2014, 12, 25), 3, 'DELI'), (datetime.date(2014, 12, 25), 3, 'EGGS'), (datetime.date(2014, 12, 25), 3, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 3, 'GROCERY I'), (datetime.date(2014, 12, 25), 3, 'GROCERY II'), (datetime.date(2014, 12, 25), 3, 'HARDWARE'), (datetime.date(2014, 12, 25), 3, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 3, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 3, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 3, 'HOME CARE'), (datetime.date(2014, 12, 25), 3, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 3, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 3, 'LINGERIE'), (datetime.date(2014, 12, 25), 3, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 3, 'MAGAZINES'), (datetime.date(2014, 12, 25), 3, 'MEATS'), (datetime.date(2014, 12, 25), 3, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 3, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 3, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 3, 'POULTRY'), (datetime.date(2014, 12, 25), 3, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 3, 'PRODUCE'), (datetime.date(2014, 12, 25), 3, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 3, 'SEAFOOD'), (datetime.date(2014, 12, 25), 30, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 30, 'BABY CARE'), (datetime.date(2014, 12, 25), 30, 'BEAUTY'), (datetime.date(2014, 12, 25), 30, 'BEVERAGES'), (datetime.date(2014, 12, 25), 30, 'BOOKS'), (datetime.date(2014, 12, 25), 30, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 30, 'CELEBRATION'), (datetime.date(2014, 12, 25), 30, 'CLEANING'), (datetime.date(2014, 12, 25), 30, 'DAIRY'), (datetime.date(2014, 12, 25), 30, 'DELI'), (datetime.date(2014, 12, 25), 30, 'EGGS'), (datetime.date(2014, 12, 25), 30, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 30, 'GROCERY I'), (datetime.date(2014, 12, 25), 30, 'GROCERY II'), (datetime.date(2014, 12, 25), 30, 'HARDWARE'), (datetime.date(2014, 12, 25), 30, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 30, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 30, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 30, 'HOME CARE'), (datetime.date(2014, 12, 25), 30, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 30, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 30, 'LINGERIE'), (datetime.date(2014, 12, 25), 30, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 30, 'MAGAZINES'), (datetime.date(2014, 12, 25), 30, 'MEATS'), (datetime.date(2014, 12, 25), 30, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 30, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 30, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 30, 'POULTRY'), (datetime.date(2014, 12, 25), 30, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 30, 'PRODUCE'), (datetime.date(2014, 12, 25), 30, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 30, 'SEAFOOD'), (datetime.date(2014, 12, 25), 31, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 31, 'BABY CARE'), (datetime.date(2014, 12, 25), 31, 'BEAUTY'), (datetime.date(2014, 12, 25), 31, 'BEVERAGES'), (datetime.date(2014, 12, 25), 31, 'BOOKS'), (datetime.date(2014, 12, 25), 31, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 31, 'CELEBRATION'), (datetime.date(2014, 12, 25), 31, 'CLEANING'), (datetime.date(2014, 12, 25), 31, 'DAIRY'), (datetime.date(2014, 12, 25), 31, 'DELI'), (datetime.date(2014, 12, 25), 31, 'EGGS'), (datetime.date(2014, 12, 25), 31, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 31, 'GROCERY I'), (datetime.date(2014, 12, 25), 31, 'GROCERY II'), (datetime.date(2014, 12, 25), 31, 'HARDWARE'), (datetime.date(2014, 12, 25), 31, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 31, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 31, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 31, 'HOME CARE'), (datetime.date(2014, 12, 25), 31, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 31, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 31, 'LINGERIE'), (datetime.date(2014, 12, 25), 31, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 31, 'MAGAZINES'), (datetime.date(2014, 12, 25), 31, 'MEATS'), (datetime.date(2014, 12, 25), 31, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 31, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 31, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 31, 'POULTRY'), (datetime.date(2014, 12, 25), 31, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 31, 'PRODUCE'), (datetime.date(2014, 12, 25), 31, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 31, 'SEAFOOD'), (datetime.date(2014, 12, 25), 32, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 32, 'BABY CARE'), (datetime.date(2014, 12, 25), 32, 'BEAUTY'), (datetime.date(2014, 12, 25), 32, 'BEVERAGES'), (datetime.date(2014, 12, 25), 32, 'BOOKS'), (datetime.date(2014, 12, 25), 32, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 32, 'CELEBRATION'), (datetime.date(2014, 12, 25), 32, 'CLEANING'), (datetime.date(2014, 12, 25), 32, 'DAIRY'), (datetime.date(2014, 12, 25), 32, 'DELI'), (datetime.date(2014, 12, 25), 32, 'EGGS'), (datetime.date(2014, 12, 25), 32, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 32, 'GROCERY I'), (datetime.date(2014, 12, 25), 32, 'GROCERY II'), (datetime.date(2014, 12, 25), 32, 'HARDWARE'), (datetime.date(2014, 12, 25), 32, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 32, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 32, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 32, 'HOME CARE'), (datetime.date(2014, 12, 25), 32, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 32, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 32, 'LINGERIE'), (datetime.date(2014, 12, 25), 32, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 32, 'MAGAZINES'), (datetime.date(2014, 12, 25), 32, 'MEATS'), (datetime.date(2014, 12, 25), 32, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 32, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 32, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 32, 'POULTRY'), (datetime.date(2014, 12, 25), 32, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 32, 'PRODUCE'), (datetime.date(2014, 12, 25), 32, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 32, 'SEAFOOD'), (datetime.date(2014, 12, 25), 33, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 33, 'BABY CARE'), (datetime.date(2014, 12, 25), 33, 'BEAUTY'), (datetime.date(2014, 12, 25), 33, 'BEVERAGES'), (datetime.date(2014, 12, 25), 33, 'BOOKS'), (datetime.date(2014, 12, 25), 33, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 33, 'CELEBRATION'), (datetime.date(2014, 12, 25), 33, 'CLEANING'), (datetime.date(2014, 12, 25), 33, 'DAIRY'), (datetime.date(2014, 12, 25), 33, 'DELI'), (datetime.date(2014, 12, 25), 33, 'EGGS'), (datetime.date(2014, 12, 25), 33, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 33, 'GROCERY I'), (datetime.date(2014, 12, 25), 33, 'GROCERY II'), (datetime.date(2014, 12, 25), 33, 'HARDWARE'), (datetime.date(2014, 12, 25), 33, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 33, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 33, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 33, 'HOME CARE'), (datetime.date(2014, 12, 25), 33, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 33, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 33, 'LINGERIE'), (datetime.date(2014, 12, 25), 33, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 33, 'MAGAZINES'), (datetime.date(2014, 12, 25), 33, 'MEATS'), (datetime.date(2014, 12, 25), 33, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 33, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 33, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 33, 'POULTRY'), (datetime.date(2014, 12, 25), 33, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 33, 'PRODUCE'), (datetime.date(2014, 12, 25), 33, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 33, 'SEAFOOD'), (datetime.date(2014, 12, 25), 34, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 34, 'BABY CARE'), (datetime.date(2014, 12, 25), 34, 'BEAUTY'), (datetime.date(2014, 12, 25), 34, 'BEVERAGES'), (datetime.date(2014, 12, 25), 34, 'BOOKS'), (datetime.date(2014, 12, 25), 34, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 34, 'CELEBRATION'), (datetime.date(2014, 12, 25), 34, 'CLEANING'), (datetime.date(2014, 12, 25), 34, 'DAIRY'), (datetime.date(2014, 12, 25), 34, 'DELI'), (datetime.date(2014, 12, 25), 34, 'EGGS'), (datetime.date(2014, 12, 25), 34, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 34, 'GROCERY I'), (datetime.date(2014, 12, 25), 34, 'GROCERY II'), (datetime.date(2014, 12, 25), 34, 'HARDWARE'), (datetime.date(2014, 12, 25), 34, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 34, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 34, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 34, 'HOME CARE'), (datetime.date(2014, 12, 25), 34, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 34, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 34, 'LINGERIE'), (datetime.date(2014, 12, 25), 34, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 34, 'MAGAZINES'), (datetime.date(2014, 12, 25), 34, 'MEATS'), (datetime.date(2014, 12, 25), 34, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 34, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 34, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 34, 'POULTRY'), (datetime.date(2014, 12, 25), 34, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 34, 'PRODUCE'), (datetime.date(2014, 12, 25), 34, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 34, 'SEAFOOD'), (datetime.date(2014, 12, 25), 35, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 35, 'BABY CARE'), (datetime.date(2014, 12, 25), 35, 'BEAUTY'), (datetime.date(2014, 12, 25), 35, 'BEVERAGES'), (datetime.date(2014, 12, 25), 35, 'BOOKS'), (datetime.date(2014, 12, 25), 35, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 35, 'CELEBRATION'), (datetime.date(2014, 12, 25), 35, 'CLEANING'), (datetime.date(2014, 12, 25), 35, 'DAIRY'), (datetime.date(2014, 12, 25), 35, 'DELI'), (datetime.date(2014, 12, 25), 35, 'EGGS'), (datetime.date(2014, 12, 25), 35, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 35, 'GROCERY I'), (datetime.date(2014, 12, 25), 35, 'GROCERY II'), (datetime.date(2014, 12, 25), 35, 'HARDWARE'), (datetime.date(2014, 12, 25), 35, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 35, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 35, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 35, 'HOME CARE'), (datetime.date(2014, 12, 25), 35, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 35, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 35, 'LINGERIE'), (datetime.date(2014, 12, 25), 35, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 35, 'MAGAZINES'), (datetime.date(2014, 12, 25), 35, 'MEATS'), (datetime.date(2014, 12, 25), 35, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 35, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 35, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 35, 'POULTRY'), (datetime.date(2014, 12, 25), 35, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 35, 'PRODUCE'), (datetime.date(2014, 12, 25), 35, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 35, 'SEAFOOD'), (datetime.date(2014, 12, 25), 36, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 36, 'BABY CARE'), (datetime.date(2014, 12, 25), 36, 'BEAUTY'), (datetime.date(2014, 12, 25), 36, 'BEVERAGES'), (datetime.date(2014, 12, 25), 36, 'BOOKS'), (datetime.date(2014, 12, 25), 36, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 36, 'CELEBRATION'), (datetime.date(2014, 12, 25), 36, 'CLEANING'), (datetime.date(2014, 12, 25), 36, 'DAIRY'), (datetime.date(2014, 12, 25), 36, 'DELI'), (datetime.date(2014, 12, 25), 36, 'EGGS'), (datetime.date(2014, 12, 25), 36, 'FROZEN FOODS'), (datetime.date(2014, 12, 25), 36, 'GROCERY I'), (datetime.date(2014, 12, 25), 36, 'GROCERY II'), (datetime.date(2014, 12, 25), 36, 'HARDWARE'), (datetime.date(2014, 12, 25), 36, 'HOME AND KITCHEN I'), (datetime.date(2014, 12, 25), 36, 'HOME AND KITCHEN II'), (datetime.date(2014, 12, 25), 36, 'HOME APPLIANCES'), (datetime.date(2014, 12, 25), 36, 'HOME CARE'), (datetime.date(2014, 12, 25), 36, 'LADIESWEAR'), (datetime.date(2014, 12, 25), 36, 'LAWN AND GARDEN'), (datetime.date(2014, 12, 25), 36, 'LINGERIE'), (datetime.date(2014, 12, 25), 36, 'LIQUOR,WINE,BEER'), (datetime.date(2014, 12, 25), 36, 'MAGAZINES'), (datetime.date(2014, 12, 25), 36, 'MEATS'), (datetime.date(2014, 12, 25), 36, 'PERSONAL CARE'), (datetime.date(2014, 12, 25), 36, 'PET SUPPLIES'), (datetime.date(2014, 12, 25), 36, 'PLAYERS AND ELECTRONICS'), (datetime.date(2014, 12, 25), 36, 'POULTRY'), (datetime.date(2014, 12, 25), 36, 'PREPARED FOODS'), (datetime.date(2014, 12, 25), 36, 'PRODUCE'), (datetime.date(2014, 12, 25), 36, 'SCHOOL AND OFFICE SUPPLIES'), (datetime.date(2014, 12, 25), 36, 'SEAFOOD'), (datetime.date(2014, 12, 25), 37, 'AUTOMOTIVE'), (datetime.date(2014, 12, 25), 37, 'BABY CARE'), (datetime.date(2014, 12, 25), 37, 'BEAUTY'), (datetime.date(2014, 12, 25), 37, 'BEVERAGES'), (datetime.date(2014, 12, 25), 37, 'BOOKS'), (datetime.date(2014, 12, 25), 37, 'BREAD/BAKERY'), (datetime.date(2014, 12, 25), 37, 'CELEBRATION'), (datetime.date(2014, 12, 25), 37, 'CLEANING'), (datetime.date(2014, 12, 25), 37, 'DAIRY'), (datetime.date(2014, 12, 25), 37, 'DELI'), ...]
new_data = pd.DataFrame(missing_data, columns = ['Sales_date', 'store_nbr', 'family'])
new_data
| Sales_date | store_nbr | family | |
|---|---|---|---|
| 0 | 2014-12-25 | 1 | AUTOMOTIVE |
| 1 | 2014-12-25 | 1 | BABY CARE |
| 2 | 2014-12-25 | 1 | BEAUTY |
| 3 | 2014-12-25 | 1 | BEVERAGES |
| 4 | 2014-12-25 | 1 | BOOKS |
| ... | ... | ... | ... |
| 7123 | 2015-12-25 | 9 | POULTRY |
| 7124 | 2015-12-25 | 9 | PREPARED FOODS |
| 7125 | 2015-12-25 | 9 | PRODUCE |
| 7126 | 2015-12-25 | 9 | SCHOOL AND OFFICE SUPPLIES |
| 7127 | 2015-12-25 | 9 | SEAFOOD |
7128 rows × 3 columns
# Merging trainset with new data which has filled missing data
merged_data = pd.concat([train_set, new_data], ignore_index=False)
merged_data
| id | date | store_nbr | family | sales | onpromotion | Sales_date | |
|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 |
| 1 | 1.0 | 2013-01-01 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 |
| 2 | 2.0 | 2013-01-01 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 |
| 3 | 3.0 | 2013-01-01 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 |
| 4 | 4.0 | 2013-01-01 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | NaN | NaN | 9 | POULTRY | NaN | NaN | 2015-12-25 |
| 7124 | NaN | NaN | 9 | PREPARED FOODS | NaN | NaN | 2015-12-25 |
| 7125 | NaN | NaN | 9 | PRODUCE | NaN | NaN | 2015-12-25 |
| 7126 | NaN | NaN | 9 | SCHOOL AND OFFICE SUPPLIES | NaN | NaN | 2015-12-25 |
| 7127 | NaN | NaN | 9 | SEAFOOD | NaN | NaN | 2015-12-25 |
3008016 rows × 7 columns
# Dropping columns "id" and "date" in merged data column
merged_data.drop(columns = ["id", "date"], inplace = True)
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | |
|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 |
| ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | NaN | NaN | 2015-12-25 |
| 7124 | 9 | PREPARED FOODS | NaN | NaN | 2015-12-25 |
| 7125 | 9 | PRODUCE | NaN | NaN | 2015-12-25 |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | NaN | NaN | 2015-12-25 |
| 7127 | 9 | SEAFOOD | NaN | NaN | 2015-12-25 |
3008016 rows × 5 columns
# Adding the City column to the merged data since I want to find the impact of promotions of sales in different cities
merged_data['City'] = 'NaN'
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | NaN |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | NaN |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | NaN |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | NaN |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | NaN | NaN | 2015-12-25 | NaN |
| 7124 | 9 | PREPARED FOODS | NaN | NaN | 2015-12-25 | NaN |
| 7125 | 9 | PRODUCE | NaN | NaN | 2015-12-25 | NaN |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | NaN | NaN | 2015-12-25 | NaN |
| 7127 | 9 | SEAFOOD | NaN | NaN | 2015-12-25 | NaN |
3008016 rows × 6 columns
merged_data["City"] = merged_data['store_nbr'].map({54: 'El Carmen',53: 'Manta',52: 'Manta',51: 'Guayaquil', 50: 'Ambato',
49:'Quito', 48: 'Quito',47: 'Quito', 46: 'Quito', 45: 'Quito', 45: 'Quito',
44:'Quito', 43:'Esmeraldas', 42:'Cuenca', 41:'Machala', 40:'Machala', 39:'Cuenca',
38:'Loja', 37:'Cuenca',36:'Libertad', 35: 'Playas', 34:'Guayaquil', 33:'Quevedo',
32:'Guayaquil', 31:'Babahoyo', 30:'Guayaquil', 29:'Guayaquil', 28:'Guayaquil',
27:'Daule', 26:'Guayaquil',25:'Salinas', 24:'Guayaquil', 23:'Ambato', 22:'Puyo',
21:'Santo Domingo', 20:'Quito', 19:'Guaranda', 18:'Quito', 17:'Quito', 16:'Santo Domingo',
15:'Ibarra', 14:'Riobamba', 13:'Latacunga',12:'Latacunga', 11:'Cayambe', 10:'Quito',9:'Quito',
8:'Quito',7:'Quito',6:'Quito',5:'Santo Domingo',4:'Quito', 3:'Quito', 2:'Quito', 1:'Quito'})
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito |
| ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | NaN | NaN | 2015-12-25 | Quito |
| 7124 | 9 | PREPARED FOODS | NaN | NaN | 2015-12-25 | Quito |
| 7125 | 9 | PRODUCE | NaN | NaN | 2015-12-25 | Quito |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | NaN | NaN | 2015-12-25 | Quito |
| 7127 | 9 | SEAFOOD | NaN | NaN | 2015-12-25 | Quito |
3008016 rows × 6 columns
merged_data = merged_data.fillna(0)
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito |
| ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito |
| 7124 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito |
| 7125 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito |
| 7127 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito |
3008016 rows × 6 columns
sample_1 = merged_data[merged_data['onpromotion'] !=0].sort_values('onpromotion', ascending = False)
sample_1
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 2216622 | 53 | GROCERY I | 7517.000 | 741.0 | 2016-05-31 | Manta |
| 2200584 | 53 | GROCERY I | 6044.000 | 726.0 | 2016-05-22 | Manta |
| 2211276 | 53 | GROCERY I | 5846.000 | 722.0 | 2016-05-28 | Manta |
| 2188110 | 53 | GROCERY I | 6154.000 | 720.0 | 2016-05-15 | Manta |
| 2213058 | 53 | GROCERY I | 6681.000 | 719.0 | 2016-05-29 | Manta |
| ... | ... | ... | ... | ... | ... | ... |
| 2503758 | 10 | HOME AND KITCHEN I | 6.000 | 1.0 | 2016-11-09 | Quito |
| 2139309 | 34 | HOME CARE | 459.000 | 1.0 | 2016-04-18 | Guayaquil |
| 1619448 | 48 | CELEBRATION | 15.000 | 1.0 | 2015-06-30 | Quito |
| 1619439 | 47 | PRODUCE | 4706.284 | 1.0 | 2015-06-30 | Quito |
| 1500143 | 5 | PREPARED FOODS | 102.720 | 1.0 | 2015-04-24 | Santo Domingo |
611329 rows × 6 columns
sample_1.head()
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 2216622 | 53 | GROCERY I | 7517.0 | 741.0 | 2016-05-31 | Manta |
| 2200584 | 53 | GROCERY I | 6044.0 | 726.0 | 2016-05-22 | Manta |
| 2211276 | 53 | GROCERY I | 5846.0 | 722.0 | 2016-05-28 | Manta |
| 2188110 | 53 | GROCERY I | 6154.0 | 720.0 | 2016-05-15 | Manta |
| 2213058 | 53 | GROCERY I | 6681.0 | 719.0 | 2016-05-29 | Manta |
stores.head()
| store_nbr | city | state | type | cluster | |
|---|---|---|---|---|---|
| 0 | 1 | Quito | Pichincha | D | 13 |
| 1 | 2 | Quito | Pichincha | D | 13 |
| 2 | 3 | Quito | Pichincha | D | 8 |
| 3 | 4 | Quito | Pichincha | D | 9 |
| 4 | 5 | Santo Domingo | Santo Domingo de los Tsachilas | D | 4 |
sample_1
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 2216622 | 53 | GROCERY I | 7517.000 | 741.0 | 2016-05-31 | Manta |
| 2200584 | 53 | GROCERY I | 6044.000 | 726.0 | 2016-05-22 | Manta |
| 2211276 | 53 | GROCERY I | 5846.000 | 722.0 | 2016-05-28 | Manta |
| 2188110 | 53 | GROCERY I | 6154.000 | 720.0 | 2016-05-15 | Manta |
| 2213058 | 53 | GROCERY I | 6681.000 | 719.0 | 2016-05-29 | Manta |
| ... | ... | ... | ... | ... | ... | ... |
| 2503758 | 10 | HOME AND KITCHEN I | 6.000 | 1.0 | 2016-11-09 | Quito |
| 2139309 | 34 | HOME CARE | 459.000 | 1.0 | 2016-04-18 | Guayaquil |
| 1619448 | 48 | CELEBRATION | 15.000 | 1.0 | 2015-06-30 | Quito |
| 1619439 | 47 | PRODUCE | 4706.284 | 1.0 | 2015-06-30 | Quito |
| 1500143 | 5 | PREPARED FOODS | 102.720 | 1.0 | 2015-04-24 | Santo Domingo |
611329 rows × 6 columns
x = sample_1['City'].value_counts()[:10].index
y = sample_1['City'].value_counts()[:10].values
plt.bar(x,y)
plt.rcParams['figure.figsize'] = (25,10)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.title('Promotion Influence', fontsize = 19, fontweight = 'bold')
plt.xlabel('Cities', fontsize = 19, fontweight = 'bold')
plt.ylabel('Sales', fontsize = 19, fontweight = 'bold')
Text(0, 0.5, 'Sales')
sample_1
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 2216622 | 53 | GROCERY I | 7517.000 | 741.0 | 2016-05-31 | Manta |
| 2200584 | 53 | GROCERY I | 6044.000 | 726.0 | 2016-05-22 | Manta |
| 2211276 | 53 | GROCERY I | 5846.000 | 722.0 | 2016-05-28 | Manta |
| 2188110 | 53 | GROCERY I | 6154.000 | 720.0 | 2016-05-15 | Manta |
| 2213058 | 53 | GROCERY I | 6681.000 | 719.0 | 2016-05-29 | Manta |
| ... | ... | ... | ... | ... | ... | ... |
| 2503758 | 10 | HOME AND KITCHEN I | 6.000 | 1.0 | 2016-11-09 | Quito |
| 2139309 | 34 | HOME CARE | 459.000 | 1.0 | 2016-04-18 | Guayaquil |
| 1619448 | 48 | CELEBRATION | 15.000 | 1.0 | 2015-06-30 | Quito |
| 1619439 | 47 | PRODUCE | 4706.284 | 1.0 | 2015-06-30 | Quito |
| 1500143 | 5 | PREPARED FOODS | 102.720 | 1.0 | 2015-04-24 | Santo Domingo |
611329 rows × 6 columns
sample_2 = merged_data[merged_data['onpromotion'] ==0].sort_values('onpromotion', ascending = False)
sample_2
| store_nbr | family | sales | onpromotion | Sales_date | City | |
|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.000 | 0.0 | 2013-01-01 | Quito |
| 1755395 | 12 | PET SUPPLIES | 0.000 | 0.0 | 2015-09-15 | Latacunga |
| 1755374 | 12 | BREAD/BAKERY | 181.000 | 0.0 | 2015-09-15 | Latacunga |
| 1755375 | 12 | CELEBRATION | 3.000 | 0.0 | 2015-09-15 | Latacunga |
| 1755377 | 12 | DAIRY | 310.000 | 0.0 | 2015-09-15 | Latacunga |
| ... | ... | ... | ... | ... | ... | ... |
| 798896 | 24 | SEAFOOD | 50.756 | 0.0 | 2014-03-26 | Guayaquil |
| 798897 | 25 | AUTOMOTIVE | 3.000 | 0.0 | 2014-03-26 | Salinas |
| 798898 | 25 | BABY CARE | 0.000 | 0.0 | 2014-03-26 | Salinas |
| 798899 | 25 | BEAUTY | 4.000 | 0.0 | 2014-03-26 | Salinas |
| 7127 | 9 | SEAFOOD | 0.000 | 0.0 | 2015-12-25 | Quito |
2396687 rows × 6 columns
c = sample_2['City'].value_counts()[:10].index
d = sample_2['City'].value_counts()[:10].values
plt.bar(c,d)
plt.rcParams['figure.figsize'] = (25,10)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.title(' NO PROMOTION ', fontsize = 19, fontweight = 'bold')
plt.xlabel('Cities', fontsize = 19, fontweight = 'bold')
plt.ylabel('Sales', fontsize = 19, fontweight = 'bold')
Text(0, 0.5, 'Sales')
# Viewing oil dataset
oil.head()
| date | dcoilwtico | |
|---|---|---|
| 0 | 2013-01-01 | 93.14 |
| 1 | 2013-01-02 | 93.14 |
| 2 | 2013-01-03 | 92.97 |
| 3 | 2013-01-04 | 93.12 |
| 4 | 2013-01-07 | 93.20 |
# Changing oil date to datetime
oil['Sales_date'] = pd.to_datetime(oil['date']).dt.date
# checking for the completeness of dates for oil prices.
date_difference = oil['Sales_date'].max() - oil['Sales_date'].min()
date_difference
datetime.timedelta(days=1703)
date_difference.days + 1
1704
# checking the unique number of days
oil.Sales_date.nunique()
1218
#checking the range of dates
oil['Sales_date'].min(), oil['Sales_date'].max()
(datetime.date(2013, 1, 1), datetime.date(2017, 8, 31))
# checking for the expected days
dates_expected = pd.date_range(start= oil['Sales_date'].min(), end = oil['Sales_date'].max())
dates_expected
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
'2013-01-09', '2013-01-10',
...
'2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25',
'2017-08-26', '2017-08-27', '2017-08-28', '2017-08-29',
'2017-08-30', '2017-08-31'],
dtype='datetime64[ns]', length=1704, freq='D')
# Finding the missing dates in both sets
dates_missing = set(dates_expected.date) - set(oil.Sales_date.unique())
dates_missing
{datetime.date(2013, 1, 5),
datetime.date(2013, 1, 6),
datetime.date(2013, 1, 12),
datetime.date(2013, 1, 13),
datetime.date(2013, 1, 19),
datetime.date(2013, 1, 20),
datetime.date(2013, 1, 26),
datetime.date(2013, 1, 27),
datetime.date(2013, 2, 2),
datetime.date(2013, 2, 3),
datetime.date(2013, 2, 9),
datetime.date(2013, 2, 10),
datetime.date(2013, 2, 16),
datetime.date(2013, 2, 17),
datetime.date(2013, 2, 23),
datetime.date(2013, 2, 24),
datetime.date(2013, 3, 2),
datetime.date(2013, 3, 3),
datetime.date(2013, 3, 9),
datetime.date(2013, 3, 10),
datetime.date(2013, 3, 16),
datetime.date(2013, 3, 17),
datetime.date(2013, 3, 23),
datetime.date(2013, 3, 24),
datetime.date(2013, 3, 30),
datetime.date(2013, 3, 31),
datetime.date(2013, 4, 6),
datetime.date(2013, 4, 7),
datetime.date(2013, 4, 13),
datetime.date(2013, 4, 14),
datetime.date(2013, 4, 20),
datetime.date(2013, 4, 21),
datetime.date(2013, 4, 27),
datetime.date(2013, 4, 28),
datetime.date(2013, 5, 4),
datetime.date(2013, 5, 5),
datetime.date(2013, 5, 11),
datetime.date(2013, 5, 12),
datetime.date(2013, 5, 18),
datetime.date(2013, 5, 19),
datetime.date(2013, 5, 25),
datetime.date(2013, 5, 26),
datetime.date(2013, 6, 1),
datetime.date(2013, 6, 2),
datetime.date(2013, 6, 8),
datetime.date(2013, 6, 9),
datetime.date(2013, 6, 15),
datetime.date(2013, 6, 16),
datetime.date(2013, 6, 22),
datetime.date(2013, 6, 23),
datetime.date(2013, 6, 29),
datetime.date(2013, 6, 30),
datetime.date(2013, 7, 6),
datetime.date(2013, 7, 7),
datetime.date(2013, 7, 13),
datetime.date(2013, 7, 14),
datetime.date(2013, 7, 20),
datetime.date(2013, 7, 21),
datetime.date(2013, 7, 27),
datetime.date(2013, 7, 28),
datetime.date(2013, 8, 3),
datetime.date(2013, 8, 4),
datetime.date(2013, 8, 10),
datetime.date(2013, 8, 11),
datetime.date(2013, 8, 17),
datetime.date(2013, 8, 18),
datetime.date(2013, 8, 24),
datetime.date(2013, 8, 25),
datetime.date(2013, 8, 31),
datetime.date(2013, 9, 1),
datetime.date(2013, 9, 7),
datetime.date(2013, 9, 8),
datetime.date(2013, 9, 14),
datetime.date(2013, 9, 15),
datetime.date(2013, 9, 21),
datetime.date(2013, 9, 22),
datetime.date(2013, 9, 28),
datetime.date(2013, 9, 29),
datetime.date(2013, 10, 5),
datetime.date(2013, 10, 6),
datetime.date(2013, 10, 12),
datetime.date(2013, 10, 13),
datetime.date(2013, 10, 19),
datetime.date(2013, 10, 20),
datetime.date(2013, 10, 26),
datetime.date(2013, 10, 27),
datetime.date(2013, 11, 2),
datetime.date(2013, 11, 3),
datetime.date(2013, 11, 9),
datetime.date(2013, 11, 10),
datetime.date(2013, 11, 16),
datetime.date(2013, 11, 17),
datetime.date(2013, 11, 23),
datetime.date(2013, 11, 24),
datetime.date(2013, 11, 30),
datetime.date(2013, 12, 1),
datetime.date(2013, 12, 7),
datetime.date(2013, 12, 8),
datetime.date(2013, 12, 14),
datetime.date(2013, 12, 15),
datetime.date(2013, 12, 21),
datetime.date(2013, 12, 22),
datetime.date(2013, 12, 28),
datetime.date(2013, 12, 29),
datetime.date(2014, 1, 4),
datetime.date(2014, 1, 5),
datetime.date(2014, 1, 11),
datetime.date(2014, 1, 12),
datetime.date(2014, 1, 18),
datetime.date(2014, 1, 19),
datetime.date(2014, 1, 25),
datetime.date(2014, 1, 26),
datetime.date(2014, 2, 1),
datetime.date(2014, 2, 2),
datetime.date(2014, 2, 8),
datetime.date(2014, 2, 9),
datetime.date(2014, 2, 15),
datetime.date(2014, 2, 16),
datetime.date(2014, 2, 22),
datetime.date(2014, 2, 23),
datetime.date(2014, 3, 1),
datetime.date(2014, 3, 2),
datetime.date(2014, 3, 8),
datetime.date(2014, 3, 9),
datetime.date(2014, 3, 15),
datetime.date(2014, 3, 16),
datetime.date(2014, 3, 22),
datetime.date(2014, 3, 23),
datetime.date(2014, 3, 29),
datetime.date(2014, 3, 30),
datetime.date(2014, 4, 5),
datetime.date(2014, 4, 6),
datetime.date(2014, 4, 12),
datetime.date(2014, 4, 13),
datetime.date(2014, 4, 19),
datetime.date(2014, 4, 20),
datetime.date(2014, 4, 26),
datetime.date(2014, 4, 27),
datetime.date(2014, 5, 3),
datetime.date(2014, 5, 4),
datetime.date(2014, 5, 10),
datetime.date(2014, 5, 11),
datetime.date(2014, 5, 17),
datetime.date(2014, 5, 18),
datetime.date(2014, 5, 24),
datetime.date(2014, 5, 25),
datetime.date(2014, 5, 31),
datetime.date(2014, 6, 1),
datetime.date(2014, 6, 7),
datetime.date(2014, 6, 8),
datetime.date(2014, 6, 14),
datetime.date(2014, 6, 15),
datetime.date(2014, 6, 21),
datetime.date(2014, 6, 22),
datetime.date(2014, 6, 28),
datetime.date(2014, 6, 29),
datetime.date(2014, 7, 5),
datetime.date(2014, 7, 6),
datetime.date(2014, 7, 12),
datetime.date(2014, 7, 13),
datetime.date(2014, 7, 19),
datetime.date(2014, 7, 20),
datetime.date(2014, 7, 26),
datetime.date(2014, 7, 27),
datetime.date(2014, 8, 2),
datetime.date(2014, 8, 3),
datetime.date(2014, 8, 9),
datetime.date(2014, 8, 10),
datetime.date(2014, 8, 16),
datetime.date(2014, 8, 17),
datetime.date(2014, 8, 23),
datetime.date(2014, 8, 24),
datetime.date(2014, 8, 30),
datetime.date(2014, 8, 31),
datetime.date(2014, 9, 6),
datetime.date(2014, 9, 7),
datetime.date(2014, 9, 13),
datetime.date(2014, 9, 14),
datetime.date(2014, 9, 20),
datetime.date(2014, 9, 21),
datetime.date(2014, 9, 27),
datetime.date(2014, 9, 28),
datetime.date(2014, 10, 4),
datetime.date(2014, 10, 5),
datetime.date(2014, 10, 11),
datetime.date(2014, 10, 12),
datetime.date(2014, 10, 18),
datetime.date(2014, 10, 19),
datetime.date(2014, 10, 25),
datetime.date(2014, 10, 26),
datetime.date(2014, 11, 1),
datetime.date(2014, 11, 2),
datetime.date(2014, 11, 8),
datetime.date(2014, 11, 9),
datetime.date(2014, 11, 15),
datetime.date(2014, 11, 16),
datetime.date(2014, 11, 22),
datetime.date(2014, 11, 23),
datetime.date(2014, 11, 29),
datetime.date(2014, 11, 30),
datetime.date(2014, 12, 6),
datetime.date(2014, 12, 7),
datetime.date(2014, 12, 13),
datetime.date(2014, 12, 14),
datetime.date(2014, 12, 20),
datetime.date(2014, 12, 21),
datetime.date(2014, 12, 27),
datetime.date(2014, 12, 28),
datetime.date(2015, 1, 3),
datetime.date(2015, 1, 4),
datetime.date(2015, 1, 10),
datetime.date(2015, 1, 11),
datetime.date(2015, 1, 17),
datetime.date(2015, 1, 18),
datetime.date(2015, 1, 24),
datetime.date(2015, 1, 25),
datetime.date(2015, 1, 31),
datetime.date(2015, 2, 1),
datetime.date(2015, 2, 7),
datetime.date(2015, 2, 8),
datetime.date(2015, 2, 14),
datetime.date(2015, 2, 15),
datetime.date(2015, 2, 21),
datetime.date(2015, 2, 22),
datetime.date(2015, 2, 28),
datetime.date(2015, 3, 1),
datetime.date(2015, 3, 7),
datetime.date(2015, 3, 8),
datetime.date(2015, 3, 14),
datetime.date(2015, 3, 15),
datetime.date(2015, 3, 21),
datetime.date(2015, 3, 22),
datetime.date(2015, 3, 28),
datetime.date(2015, 3, 29),
datetime.date(2015, 4, 4),
datetime.date(2015, 4, 5),
datetime.date(2015, 4, 11),
datetime.date(2015, 4, 12),
datetime.date(2015, 4, 18),
datetime.date(2015, 4, 19),
datetime.date(2015, 4, 25),
datetime.date(2015, 4, 26),
datetime.date(2015, 5, 2),
datetime.date(2015, 5, 3),
datetime.date(2015, 5, 9),
datetime.date(2015, 5, 10),
datetime.date(2015, 5, 16),
datetime.date(2015, 5, 17),
datetime.date(2015, 5, 23),
datetime.date(2015, 5, 24),
datetime.date(2015, 5, 30),
datetime.date(2015, 5, 31),
datetime.date(2015, 6, 6),
datetime.date(2015, 6, 7),
datetime.date(2015, 6, 13),
datetime.date(2015, 6, 14),
datetime.date(2015, 6, 20),
datetime.date(2015, 6, 21),
datetime.date(2015, 6, 27),
datetime.date(2015, 6, 28),
datetime.date(2015, 7, 4),
datetime.date(2015, 7, 5),
datetime.date(2015, 7, 11),
datetime.date(2015, 7, 12),
datetime.date(2015, 7, 18),
datetime.date(2015, 7, 19),
datetime.date(2015, 7, 25),
datetime.date(2015, 7, 26),
datetime.date(2015, 8, 1),
datetime.date(2015, 8, 2),
datetime.date(2015, 8, 8),
datetime.date(2015, 8, 9),
datetime.date(2015, 8, 15),
datetime.date(2015, 8, 16),
datetime.date(2015, 8, 22),
datetime.date(2015, 8, 23),
datetime.date(2015, 8, 29),
datetime.date(2015, 8, 30),
datetime.date(2015, 9, 5),
datetime.date(2015, 9, 6),
datetime.date(2015, 9, 12),
datetime.date(2015, 9, 13),
datetime.date(2015, 9, 19),
datetime.date(2015, 9, 20),
datetime.date(2015, 9, 26),
datetime.date(2015, 9, 27),
datetime.date(2015, 10, 3),
datetime.date(2015, 10, 4),
datetime.date(2015, 10, 10),
datetime.date(2015, 10, 11),
datetime.date(2015, 10, 17),
datetime.date(2015, 10, 18),
datetime.date(2015, 10, 24),
datetime.date(2015, 10, 25),
datetime.date(2015, 10, 31),
datetime.date(2015, 11, 1),
datetime.date(2015, 11, 7),
datetime.date(2015, 11, 8),
datetime.date(2015, 11, 14),
datetime.date(2015, 11, 15),
datetime.date(2015, 11, 21),
datetime.date(2015, 11, 22),
datetime.date(2015, 11, 28),
datetime.date(2015, 11, 29),
datetime.date(2015, 12, 5),
datetime.date(2015, 12, 6),
datetime.date(2015, 12, 12),
datetime.date(2015, 12, 13),
datetime.date(2015, 12, 19),
datetime.date(2015, 12, 20),
datetime.date(2015, 12, 26),
datetime.date(2015, 12, 27),
datetime.date(2016, 1, 2),
datetime.date(2016, 1, 3),
datetime.date(2016, 1, 9),
datetime.date(2016, 1, 10),
datetime.date(2016, 1, 16),
datetime.date(2016, 1, 17),
datetime.date(2016, 1, 23),
datetime.date(2016, 1, 24),
datetime.date(2016, 1, 30),
datetime.date(2016, 1, 31),
datetime.date(2016, 2, 6),
datetime.date(2016, 2, 7),
datetime.date(2016, 2, 13),
datetime.date(2016, 2, 14),
datetime.date(2016, 2, 20),
datetime.date(2016, 2, 21),
datetime.date(2016, 2, 27),
datetime.date(2016, 2, 28),
datetime.date(2016, 3, 5),
datetime.date(2016, 3, 6),
datetime.date(2016, 3, 12),
datetime.date(2016, 3, 13),
datetime.date(2016, 3, 19),
datetime.date(2016, 3, 20),
datetime.date(2016, 3, 26),
datetime.date(2016, 3, 27),
datetime.date(2016, 4, 2),
datetime.date(2016, 4, 3),
datetime.date(2016, 4, 9),
datetime.date(2016, 4, 10),
datetime.date(2016, 4, 16),
datetime.date(2016, 4, 17),
datetime.date(2016, 4, 23),
datetime.date(2016, 4, 24),
datetime.date(2016, 4, 30),
datetime.date(2016, 5, 1),
datetime.date(2016, 5, 7),
datetime.date(2016, 5, 8),
datetime.date(2016, 5, 14),
datetime.date(2016, 5, 15),
datetime.date(2016, 5, 21),
datetime.date(2016, 5, 22),
datetime.date(2016, 5, 28),
datetime.date(2016, 5, 29),
datetime.date(2016, 6, 4),
datetime.date(2016, 6, 5),
datetime.date(2016, 6, 11),
datetime.date(2016, 6, 12),
datetime.date(2016, 6, 18),
datetime.date(2016, 6, 19),
datetime.date(2016, 6, 25),
datetime.date(2016, 6, 26),
datetime.date(2016, 7, 2),
datetime.date(2016, 7, 3),
datetime.date(2016, 7, 9),
datetime.date(2016, 7, 10),
datetime.date(2016, 7, 16),
datetime.date(2016, 7, 17),
datetime.date(2016, 7, 23),
datetime.date(2016, 7, 24),
datetime.date(2016, 7, 30),
datetime.date(2016, 7, 31),
datetime.date(2016, 8, 6),
datetime.date(2016, 8, 7),
datetime.date(2016, 8, 13),
datetime.date(2016, 8, 14),
datetime.date(2016, 8, 20),
datetime.date(2016, 8, 21),
datetime.date(2016, 8, 27),
datetime.date(2016, 8, 28),
datetime.date(2016, 9, 3),
datetime.date(2016, 9, 4),
datetime.date(2016, 9, 10),
datetime.date(2016, 9, 11),
datetime.date(2016, 9, 17),
datetime.date(2016, 9, 18),
datetime.date(2016, 9, 24),
datetime.date(2016, 9, 25),
datetime.date(2016, 10, 1),
datetime.date(2016, 10, 2),
datetime.date(2016, 10, 8),
datetime.date(2016, 10, 9),
datetime.date(2016, 10, 15),
datetime.date(2016, 10, 16),
datetime.date(2016, 10, 22),
datetime.date(2016, 10, 23),
datetime.date(2016, 10, 29),
datetime.date(2016, 10, 30),
datetime.date(2016, 11, 5),
datetime.date(2016, 11, 6),
datetime.date(2016, 11, 12),
datetime.date(2016, 11, 13),
datetime.date(2016, 11, 19),
datetime.date(2016, 11, 20),
datetime.date(2016, 11, 26),
datetime.date(2016, 11, 27),
datetime.date(2016, 12, 3),
datetime.date(2016, 12, 4),
datetime.date(2016, 12, 10),
datetime.date(2016, 12, 11),
datetime.date(2016, 12, 17),
datetime.date(2016, 12, 18),
datetime.date(2016, 12, 24),
datetime.date(2016, 12, 25),
datetime.date(2016, 12, 31),
datetime.date(2017, 1, 1),
datetime.date(2017, 1, 7),
datetime.date(2017, 1, 8),
datetime.date(2017, 1, 14),
datetime.date(2017, 1, 15),
datetime.date(2017, 1, 21),
datetime.date(2017, 1, 22),
datetime.date(2017, 1, 28),
datetime.date(2017, 1, 29),
datetime.date(2017, 2, 4),
datetime.date(2017, 2, 5),
datetime.date(2017, 2, 11),
datetime.date(2017, 2, 12),
datetime.date(2017, 2, 18),
datetime.date(2017, 2, 19),
datetime.date(2017, 2, 25),
datetime.date(2017, 2, 26),
datetime.date(2017, 3, 4),
datetime.date(2017, 3, 5),
datetime.date(2017, 3, 11),
datetime.date(2017, 3, 12),
datetime.date(2017, 3, 18),
datetime.date(2017, 3, 19),
datetime.date(2017, 3, 25),
datetime.date(2017, 3, 26),
datetime.date(2017, 4, 1),
datetime.date(2017, 4, 2),
datetime.date(2017, 4, 8),
datetime.date(2017, 4, 9),
datetime.date(2017, 4, 15),
datetime.date(2017, 4, 16),
datetime.date(2017, 4, 22),
datetime.date(2017, 4, 23),
datetime.date(2017, 4, 29),
datetime.date(2017, 4, 30),
datetime.date(2017, 5, 6),
datetime.date(2017, 5, 7),
datetime.date(2017, 5, 13),
datetime.date(2017, 5, 14),
datetime.date(2017, 5, 20),
datetime.date(2017, 5, 21),
datetime.date(2017, 5, 27),
datetime.date(2017, 5, 28),
datetime.date(2017, 6, 3),
datetime.date(2017, 6, 4),
datetime.date(2017, 6, 10),
datetime.date(2017, 6, 11),
datetime.date(2017, 6, 17),
datetime.date(2017, 6, 18),
datetime.date(2017, 6, 24),
datetime.date(2017, 6, 25),
datetime.date(2017, 7, 1),
datetime.date(2017, 7, 2),
datetime.date(2017, 7, 8),
datetime.date(2017, 7, 9),
datetime.date(2017, 7, 15),
datetime.date(2017, 7, 16),
datetime.date(2017, 7, 22),
datetime.date(2017, 7, 23),
datetime.date(2017, 7, 29),
datetime.date(2017, 7, 30),
datetime.date(2017, 8, 5),
datetime.date(2017, 8, 6),
datetime.date(2017, 8, 12),
datetime.date(2017, 8, 13),
datetime.date(2017, 8, 19),
datetime.date(2017, 8, 20),
datetime.date(2017, 8, 26),
datetime.date(2017, 8, 27)}
missing_oil_data = list(product(dates_missing))
# adding the missing oil data to get a new oil data with complete dates
revised_oil_data = pd.DataFrame(missing_oil_data, columns = ['Sales_date'])
revised_oil_data
| Sales_date | |
|---|---|
| 0 | 2015-11-07 |
| 1 | 2013-12-21 |
| 2 | 2016-01-23 |
| 3 | 2013-11-30 |
| 4 | 2017-05-28 |
| ... | ... |
| 481 | 2016-01-30 |
| 482 | 2016-01-10 |
| 483 | 2016-07-16 |
| 484 | 2017-06-17 |
| 485 | 2013-11-17 |
486 rows × 1 columns
# Merging oil and revised data
Merged_oil_data = pd.concat([oil, revised_oil_data], ignore_index=False)
Merged_oil_data
| date | dcoilwtico | Sales_date | |
|---|---|---|---|
| 0 | 2013-01-01 | 93.14 | 2013-01-01 |
| 1 | 2013-01-02 | 93.14 | 2013-01-02 |
| 2 | 2013-01-03 | 92.97 | 2013-01-03 |
| 3 | 2013-01-04 | 93.12 | 2013-01-04 |
| 4 | 2013-01-07 | 93.20 | 2013-01-07 |
| ... | ... | ... | ... |
| 481 | NaN | NaN | 2016-01-30 |
| 482 | NaN | NaN | 2016-01-10 |
| 483 | NaN | NaN | 2016-07-16 |
| 484 | NaN | NaN | 2017-06-17 |
| 485 | NaN | NaN | 2013-11-17 |
1704 rows × 3 columns
# Filling in missing values in Merged_oil_data
Merged_oil_data = Merged_oil_data.ffill().bfill()
Merged_oil_data
| date | dcoilwtico | Sales_date | |
|---|---|---|---|
| 0 | 2013-01-01 | 93.14 | 2013-01-01 |
| 1 | 2013-01-02 | 93.14 | 2013-01-02 |
| 2 | 2013-01-03 | 92.97 | 2013-01-03 |
| 3 | 2013-01-04 | 93.12 | 2013-01-04 |
| 4 | 2013-01-07 | 93.20 | 2013-01-07 |
| ... | ... | ... | ... |
| 481 | 2017-08-31 | 47.26 | 2016-01-30 |
| 482 | 2017-08-31 | 47.26 | 2016-01-10 |
| 483 | 2017-08-31 | 47.26 | 2016-07-16 |
| 484 | 2017-08-31 | 47.26 | 2017-06-17 |
| 485 | 2017-08-31 | 47.26 | 2013-11-17 |
1704 rows × 3 columns
# Dropping the date column in Merged_oil_data
Merged_oil_data.drop(columns = ["date"], inplace = True)
# Previewing merged oil data
Merged_oil_data
| dcoilwtico | Sales_date | |
|---|---|---|
| 0 | 93.14 | 2013-01-01 |
| 1 | 93.14 | 2013-01-02 |
| 2 | 92.97 | 2013-01-03 |
| 3 | 93.12 | 2013-01-04 |
| 4 | 93.20 | 2013-01-07 |
| ... | ... | ... |
| 481 | 47.26 | 2016-01-30 |
| 482 | 47.26 | 2016-01-10 |
| 483 | 47.26 | 2016-07-16 |
| 484 | 47.26 | 2017-06-17 |
| 485 | 47.26 | 2013-11-17 |
1704 rows × 2 columns
# Concatenating Merged_oil_data and merged data to analyse the effect of oil prices on sales.
Merged_oil_data2 = merged_data.merge(Merged_oil_data, how='inner', on='Sales_date')
Merged_oil_data2
| store_nbr | family | sales | onpromotion | Sales_date | City | dcoilwtico | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito | 93.14 |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito | 93.14 |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito | 93.14 |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito | 93.14 |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito | 93.14 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 3008011 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito | 37.62 |
| 3008012 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito | 37.62 |
| 3008013 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito | 37.62 |
| 3008014 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito | 37.62 |
| 3008015 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito | 37.62 |
3008016 rows × 7 columns
# Checking for null values in merged data.
Merged_oil_data2.isnull().sum()
store_nbr 0 family 0 sales 0 onpromotion 0 Sales_date 0 City 0 dcoilwtico 0 dtype: int64
Sp = Merged_oil_data2.plot.scatter(x="dcoilwtico", y="sales", c='green')
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel('dcoilwtico', fontsize = 25, fontweight = 'bold')
plt.ylabel('sales', fontsize = 25, fontweight = 'bold')
Text(0, 0.5, 'sales')
# adding a column called PROMO/NOPROMO to the merged data set
merged_data['PROMO/NOPROMO']='NaN'
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito | NaN |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito | NaN |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito | NaN |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito | NaN |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito | NaN |
| 7124 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito | NaN |
| 7125 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito | NaN |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito | NaN |
| 7127 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito | NaN |
3008016 rows × 7 columns
# mapping the merged data set to split the Promo/NoPromo column
merged_data["PROMO/NOPROMO"] = merged_data['onpromotion'].map(
{0:'No Promo'})
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7124 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7125 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7127 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
3008016 rows × 7 columns
# Replacing null values in the Promo/No Promo column with Promo
merged_data["PROMO/NOPROMO"].replace(np.NAN ,value='Promo', inplace=True)
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7124 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7125 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7127 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
3008016 rows × 7 columns
sales_onpromo = merged_data.groupby(by = 'PROMO/NOPROMO').sales.agg(
["sum"]).sort_values(by = ["sum"], ascending = False)
sales_onpromo
| sum | |
|---|---|
| PROMO/NOPROMO | |
| Promo | 6.955052e+08 |
| No Promo | 3.781398e+08 |
sales_onpromo.plot(kind ='bar')
plt.xticks(fontsize = 17)
plt.yticks(fontsize = 17)
plt.ylabel("SALES",fontsize = 25,fontweight = 'bold')
plt.xlabel("PROMO and NO PROMO",fontsize = 20,fontweight = 'bold')
plt.legend(bbox_to_anchor =(1,1),fontsize = 15)
plt.title("SALES WITH AND WITHOUT PROMO",fontsize = 15,fontweight = 'bold')
Text(0.5, 1.0, 'SALES WITH AND WITHOUT PROMO')
train_set.family.unique()
array(['AUTOMOTIVE', 'BABY CARE', 'BEAUTY', 'BEVERAGES', 'BOOKS',
'BREAD/BAKERY', 'CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS',
'FROZEN FOODS', 'GROCERY I', 'GROCERY II', 'HARDWARE',
'HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES',
'HOME CARE', 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE',
'LIQUOR,WINE,BEER', 'MAGAZINES', 'MEATS', 'PERSONAL CARE',
'PET SUPPLIES', 'PLAYERS AND ELECTRONICS', 'POULTRY',
'PREPARED FOODS', 'PRODUCE', 'SCHOOL AND OFFICE SUPPLIES',
'SEAFOOD'], dtype=object)
# Viewing the merged dataset
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 1 | 1 | BABY CARE | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 2 | 1 | BEAUTY | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 3 | 1 | BEVERAGES | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| 4 | 1 | BOOKS | 0.0 | 0.0 | 2013-01-01 | Quito | No Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7124 | 9 | PREPARED FOODS | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7125 | 9 | PRODUCE | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
| 7127 | 9 | SEAFOOD | 0.0 | 0.0 | 2015-12-25 | Quito | No Promo |
3008016 rows × 7 columns
# sorting the family column from highest to lowest
group_by_family = merged_data.groupby(by = "family").sales.agg(["sum"]).sort_values(by = ["sum"],ascending = False)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
group_by_family
| sum | |
|---|---|
| family | |
| GROCERY I | 343462734.89 |
| BEVERAGES | 216954486.00 |
| PRODUCE | 122704684.68 |
| CLEANING | 97521289.00 |
| DAIRY | 64487709.00 |
| BREAD/BAKERY | 42133945.58 |
| POULTRY | 31876004.47 |
| MEATS | 31086468.40 |
| PERSONAL CARE | 24592051.00 |
| DELI | 24110322.47 |
| HOME CARE | 16022744.00 |
| EGGS | 15588296.00 |
| FROZEN FOODS | 14073887.72 |
| PREPARED FOODS | 8799895.12 |
| LIQUOR,WINE,BEER | 7746640.00 |
| SEAFOOD | 2015431.88 |
| GROCERY II | 1962767.00 |
| HOME AND KITCHEN I | 1861491.00 |
| HOME AND KITCHEN II | 1520670.00 |
| CELEBRATION | 761177.00 |
| LINGERIE | 653114.00 |
| LADIESWEAR | 651159.00 |
| PLAYERS AND ELECTRONICS | 562608.00 |
| AUTOMOTIVE | 554822.00 |
| LAWN AND GARDEN | 548842.00 |
| PET SUPPLIES | 356584.00 |
| BEAUTY | 337893.00 |
| SCHOOL AND OFFICE SUPPLIES | 269316.00 |
| MAGAZINES | 266359.00 |
| HARDWARE | 103470.00 |
| HOME APPLIANCES | 41601.00 |
| BABY CARE | 10051.00 |
| BOOKS | 6438.00 |
plt.figure(figsize = (20,15))
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)
sns.barplot(y = group_by_family[:10].index, x = (group_by_family["sum"])[:10])
plt.ylabel("Items sold",fontsize = 40,fontweight = 'bold')
plt.xlabel("Sales",fontsize = 25,fontweight = 'bold')
plt.title("ITEMS WITH HIGHEST AND LOWEST SALES",fontsize = 40,fontweight = 'bold')
Text(0.5, 1.0, 'ITEMS WITH HIGHEST AND LOWEST SALES')
# Previewing merged data set
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 1 | 1 | BABY CARE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 2 | 1 | BEAUTY | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 3 | 1 | BEVERAGES | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 4 | 1 | BOOKS | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7124 | 9 | PREPARED FOODS | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7125 | 9 | PRODUCE | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7127 | 9 | SEAFOOD | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
3008016 rows × 7 columns
# Sorting sales values from highest to lowest
sample_3 = merged_data[merged_data['sales'] > 1].sort_values('sales', ascending = False)
sample_3
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 2163723 | 2 | GROCERY I | 124717.00 | 59.00 | 2016-05-02 | Quito | Promo |
| 2445984 | 39 | MEATS | 89576.36 | 0.00 | 2016-10-07 | Cuenca | No Promo |
| 2144154 | 20 | GROCERY I | 87438.52 | 53.00 | 2016-04-21 | Quito | Promo |
| 2139699 | 45 | GROCERY I | 76090.00 | 38.00 | 2016-04-18 | Quito | Promo |
| 2153031 | 2 | GROCERY I | 63434.00 | 30.00 | 2016-04-26 | Quito | Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1939442 | 26 | SEAFOOD | 1.06 | 0.00 | 2015-12-28 | Guayaquil | No Promo |
| 465794 | 28 | SEAFOOD | 1.05 | 0.00 | 2013-09-19 | Guayaquil | No Promo |
| 406295 | 9 | SEAFOOD | 1.04 | 0.00 | 2013-08-16 | Quito | No Promo |
| 2550668 | 26 | SEAFOOD | 1.02 | 0.00 | 2016-12-05 | Guayaquil | No Promo |
| 1693526 | 26 | SEAFOOD | 1.01 | 0.00 | 2015-08-11 | Guayaquil | No Promo |
1946315 rows × 7 columns
merged_data['Sales_date'] = pd.to_datetime(merged_data['Sales_date']).dt.date
# Finding the aggregated sales per date
aggregated_sales_perdate = sample_3.groupby('Sales_date', as_index=False)['sales'].sum()
aggregated_sales_perdate
| Sales_date | sales | |
|---|---|---|
| 0 | 2013-01-01 | 2511.62 |
| 1 | 2013-01-02 | 496059.42 |
| 2 | 2013-01-03 | 361413.23 |
| 3 | 2013-01-04 | 354414.68 |
| 4 | 2013-01-05 | 477309.12 |
| ... | ... | ... |
| 1679 | 2017-08-11 | 826295.72 |
| 1680 | 2017-08-12 | 792577.54 |
| 1681 | 2017-08-13 | 865568.68 |
| 1682 | 2017-08-14 | 760854.41 |
| 1683 | 2017-08-15 | 762598.94 |
1684 rows × 2 columns
# Viewing aggregated sales data
aggregated_sales_perdate
| Sales_date | sales | |
|---|---|---|
| 0 | 2013-01-01 | 2511.62 |
| 1 | 2013-01-02 | 496059.42 |
| 2 | 2013-01-03 | 361413.23 |
| 3 | 2013-01-04 | 354414.68 |
| 4 | 2013-01-05 | 477309.12 |
| ... | ... | ... |
| 1679 | 2017-08-11 | 826295.72 |
| 1680 | 2017-08-12 | 792577.54 |
| 1681 | 2017-08-13 | 865568.68 |
| 1682 | 2017-08-14 | 760854.41 |
| 1683 | 2017-08-15 | 762598.94 |
1684 rows × 2 columns
# Adding year, month, week and day columns to the Sample dataset
sample_3['year'] = pd.to_datetime(sample_3['Sales_date']).dt.year
sample_3['month'] = pd.to_datetime(sample_3['Sales_date']).dt.month
sample_3['week'] = pd.to_datetime(sample_3['Sales_date']).dt.week
sample_3['day'] = pd.to_datetime(sample_3['Sales_date']).dt.day
warnings.filterwarnings('ignore')
/var/folders/xr/z3nw6rbs7k5g_8qmkl696c4w0000gp/T/ipykernel_67788/4157373246.py:4: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead. sample_3['week'] = pd.to_datetime(sample_3['Sales_date']).dt.week
# Viewing the sample_3 dataset
sample_3
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | year | month | week | day | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2163723 | 2 | GROCERY I | 124717.00 | 59.00 | 2016-05-02 | Quito | Promo | 2016 | 5 | 18 | 2 |
| 2445984 | 39 | MEATS | 89576.36 | 0.00 | 2016-10-07 | Cuenca | No Promo | 2016 | 10 | 40 | 7 |
| 2144154 | 20 | GROCERY I | 87438.52 | 53.00 | 2016-04-21 | Quito | Promo | 2016 | 4 | 16 | 21 |
| 2139699 | 45 | GROCERY I | 76090.00 | 38.00 | 2016-04-18 | Quito | Promo | 2016 | 4 | 16 | 18 |
| 2153031 | 2 | GROCERY I | 63434.00 | 30.00 | 2016-04-26 | Quito | Promo | 2016 | 4 | 17 | 26 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1939442 | 26 | SEAFOOD | 1.06 | 0.00 | 2015-12-28 | Guayaquil | No Promo | 2015 | 12 | 53 | 28 |
| 465794 | 28 | SEAFOOD | 1.05 | 0.00 | 2013-09-19 | Guayaquil | No Promo | 2013 | 9 | 38 | 19 |
| 406295 | 9 | SEAFOOD | 1.04 | 0.00 | 2013-08-16 | Quito | No Promo | 2013 | 8 | 33 | 16 |
| 2550668 | 26 | SEAFOOD | 1.02 | 0.00 | 2016-12-05 | Guayaquil | No Promo | 2016 | 12 | 49 | 5 |
| 1693526 | 26 | SEAFOOD | 1.01 | 0.00 | 2015-08-11 | Guayaquil | No Promo | 2015 | 8 | 33 | 11 |
1946315 rows × 11 columns
#Adding years column to the aggregated_sales_perdate column
aggregated_sales_perdate['years'] = pd.to_datetime(aggregated_sales_perdate['Sales_date']).dt.year
aggregated_sales_perdate.groupby(['years'], as_index=False)['sales'].max()
aggregated_sales_perdate
| Sales_date | sales | years | |
|---|---|---|---|
| 0 | 2013-01-01 | 2511.62 | 2013 |
| 1 | 2013-01-02 | 496059.42 | 2013 |
| 2 | 2013-01-03 | 361413.23 | 2013 |
| 3 | 2013-01-04 | 354414.68 | 2013 |
| 4 | 2013-01-05 | 477309.12 | 2013 |
| ... | ... | ... | ... |
| 1679 | 2017-08-11 | 826295.72 | 2017 |
| 1680 | 2017-08-12 | 792577.54 | 2017 |
| 1681 | 2017-08-13 | 865568.68 | 2017 |
| 1682 | 2017-08-14 | 760854.41 | 2017 |
| 1683 | 2017-08-15 | 762598.94 | 2017 |
1684 rows × 3 columns
# Filtering the maximum sales values
Maximum_values = aggregated_sales_perdate.groupby(['years'], as_index=False)['sales'].max()
a = aggregated_sales_perdate.sort_values(by='sales', ascending=False)
b = a.loc[a.groupby("years")["sales"].idxmax()]
for row in Maximum_values.itertuples():
musk = (aggregated_sales_perdate['years'] == row.years) & (aggregated_sales_perdate['sales'] == row.sales)
Maximum_values_row = aggregated_sales_perdate.loc[musk]
#t = tmp_row['sales_date'].values
#s = row.sales
print('Peak sales for', row.years, 'occured on', Maximum_values_row['Sales_date'].values, 'for', row.sales, 'items')
Peak sales for 2013 occured on [datetime.date(2013, 12, 23)] for 792818.284427 items Peak sales for 2014 occured on [datetime.date(2014, 12, 23)] for 1064917.9730725 items Peak sales for 2015 occured on [datetime.date(2015, 10, 4)] for 1234058.938595 items Peak sales for 2016 occured on [datetime.date(2016, 4, 18)] for 1345848.6048929 items Peak sales for 2017 occured on [datetime.date(2017, 4, 1)] for 1463012.962459 items
a
| Sales_date | sales | years | |
|---|---|---|---|
| 1547 | 2017-04-01 | 1463012.96 | 2017 |
| 1458 | 2017-01-02 | 1402224.37 | 2017 |
| 1611 | 2017-06-04 | 1376449.52 | 2017 |
| 1200 | 2016-04-18 | 1345848.60 | 2016 |
| 1577 | 2017-05-01 | 1306634.90 | 2017 |
| ... | ... | ... | ... |
| 1092 | 2016-01-01 | 16432.39 | 2016 |
| 728 | 2015-01-01 | 12771.62 | 2015 |
| 1457 | 2017-01-01 | 12082.50 | 2017 |
| 364 | 2014-01-01 | 8597.07 | 2014 |
| 0 | 2013-01-01 | 2511.62 | 2013 |
1684 rows × 3 columns
b
| Sales_date | sales | years | |
|---|---|---|---|
| 356 | 2013-12-23 | 792818.28 | 2013 |
| 720 | 2014-12-23 | 1064917.97 | 2014 |
| 1004 | 2015-10-04 | 1234058.94 | 2015 |
| 1200 | 2016-04-18 | 1345848.60 | 2016 |
| 1547 | 2017-04-01 | 1463012.96 | 2017 |
b.plot(x = 'Sales_date', y = 'sales', kind = 'bar')
plt.xticks(fontsize = 17)
plt.yticks(fontsize = 17)
plt.ylabel("Sales made",fontsize = 30,fontweight = 'bold')
plt.xlabel("Sales date",fontsize = 30,fontweight = 'bold')
plt.title("HIGHEST SALES DATE",fontsize = 40,fontweight = 'bold')
Text(0.5, 1.0, 'HIGHEST SALES DATE')
e = aggregated_sales_perdate[aggregated_sales_perdate['sales'] !=0].sort_values(by='sales', ascending=True)
e
| Sales_date | sales | years | |
|---|---|---|---|
| 0 | 2013-01-01 | 2511.62 | 2013 |
| 364 | 2014-01-01 | 8597.07 | 2014 |
| 1457 | 2017-01-01 | 12082.50 | 2017 |
| 728 | 2015-01-01 | 12771.62 | 2015 |
| 1092 | 2016-01-01 | 16432.39 | 2016 |
| ... | ... | ... | ... |
| 1577 | 2017-05-01 | 1306634.90 | 2017 |
| 1200 | 2016-04-18 | 1345848.60 | 2016 |
| 1611 | 2017-06-04 | 1376449.52 | 2017 |
| 1458 | 2017-01-02 | 1402224.37 | 2017 |
| 1547 | 2017-04-01 | 1463012.96 | 2017 |
1684 rows × 3 columns
f = e.loc[e.groupby("years")["sales"].idxmin()]
f
| Sales_date | sales | years | |
|---|---|---|---|
| 0 | 2013-01-01 | 2511.62 | 2013 |
| 364 | 2014-01-01 | 8597.07 | 2014 |
| 728 | 2015-01-01 | 12771.62 | 2015 |
| 1092 | 2016-01-01 | 16432.39 | 2016 |
| 1457 | 2017-01-01 | 12082.50 | 2017 |
f.plot(x = 'Sales_date', y = 'sales', kind = 'bar')
plt.xticks(fontsize = 17)
plt.yticks(fontsize = 17)
plt.ylabel("Sales made",fontsize = 30,fontweight = 'bold')
plt.xlabel("Sales date",fontsize = 30,fontweight = 'bold')
plt.title("LOWEST SALES DATE",fontsize = 40,fontweight = 'bold')
Text(0.5, 1.0, 'LOWEST SALES DATE')
# Average sales before earthquake, i.e, 2016-04-01 to 2016-04-15
merged_data['Sales_date'] = pd.to_datetime(merged_data['Sales_date']).dt.date
merged_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3008016 entries, 0 to 7127 Data columns (total 7 columns): # Column Dtype --- ------ ----- 0 store_nbr int64 1 family object 2 sales float64 3 onpromotion float64 4 Sales_date object 5 City object 6 PROMO/NOPROMO object dtypes: float64(2), int64(1), object(4) memory usage: 183.6+ MB
merged_data['Sales_date']= merged_data['Sales_date'].astype('str')
before_earthquake = merged_data[(merged_data['Sales_date']> '2016-04-01') & (merged_data['Sales_date']<= '2016-04-15')]
#finding the average of sales for before earthquake.
avg_before_earthquake = before_earthquake['sales'].mean()
avg_before_earthquake
445.007176512935
# Average sales after earthquake, i.e, 2016-04-17 to 2016-04-30
after_earthquake = merged_data[(merged_data['Sales_date']> '2016-04-17') & (merged_data['Sales_date']<= '2016-04-30')]
#finding the average of sales for before earthquake.
avg_after_earthquake = after_earthquake['sales'].mean()
avg_after_earthquake
511.72250327857483
# Viewing the holiday dataset
holidays.head()
| date | type | locale | locale_name | description | transferred | |
|---|---|---|---|---|---|---|
| 0 | 2012-03-02 | Holiday | Local | Manta | Fundacion de Manta | False |
| 1 | 2012-04-01 | Holiday | Regional | Cotopaxi | Provincializacion de Cotopaxi | False |
| 2 | 2012-04-12 | Holiday | Local | Cuenca | Fundacion de Cuenca | False |
| 3 | 2012-04-14 | Holiday | Local | Libertad | Cantonizacion de Libertad | False |
| 4 | 2012-04-21 | Holiday | Local | Riobamba | Cantonizacion de Riobamba | False |
# Printing a concise summary of the holidays data set
holidays.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 350 entries, 0 to 349 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 350 non-null object 1 type 350 non-null object 2 locale 350 non-null object 3 locale_name 350 non-null object 4 description 350 non-null object 5 transferred 350 non-null bool dtypes: bool(1), object(5) memory usage: 14.1+ KB
holidays['Sales_date'] = pd.to_datetime(holidays['date']).dt.date
holidays
| date | type | locale | locale_name | description | transferred | Sales_date | |
|---|---|---|---|---|---|---|---|
| 0 | 2012-03-02 | Holiday | Local | Manta | Fundacion de Manta | False | 2012-03-02 |
| 1 | 2012-04-01 | Holiday | Regional | Cotopaxi | Provincializacion de Cotopaxi | False | 2012-04-01 |
| 2 | 2012-04-12 | Holiday | Local | Cuenca | Fundacion de Cuenca | False | 2012-04-12 |
| 3 | 2012-04-14 | Holiday | Local | Libertad | Cantonizacion de Libertad | False | 2012-04-14 |
| 4 | 2012-04-21 | Holiday | Local | Riobamba | Cantonizacion de Riobamba | False | 2012-04-21 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 345 | 2017-12-22 | Additional | National | Ecuador | Navidad-3 | False | 2017-12-22 |
| 346 | 2017-12-23 | Additional | National | Ecuador | Navidad-2 | False | 2017-12-23 |
| 347 | 2017-12-24 | Additional | National | Ecuador | Navidad-1 | False | 2017-12-24 |
| 348 | 2017-12-25 | Holiday | National | Ecuador | Navidad | False | 2017-12-25 |
| 349 | 2017-12-26 | Additional | National | Ecuador | Navidad+1 | False | 2017-12-26 |
350 rows × 7 columns
merged_data
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 1 | 1 | BABY CARE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 2 | 1 | BEAUTY | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 3 | 1 | BEVERAGES | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| 4 | 1 | BOOKS | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7123 | 9 | POULTRY | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7124 | 9 | PREPARED FOODS | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7125 | 9 | PRODUCE | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7126 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
| 7127 | 9 | SEAFOOD | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo |
3008016 rows × 7 columns
holidays.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 350 entries, 0 to 349 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 350 non-null object 1 type 350 non-null object 2 locale 350 non-null object 3 locale_name 350 non-null object 4 description 350 non-null object 5 transferred 350 non-null bool 6 Sales_date 350 non-null object dtypes: bool(1), object(6) memory usage: 16.9+ KB
merged_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3008016 entries, 0 to 7127 Data columns (total 7 columns): # Column Dtype --- ------ ----- 0 store_nbr int64 1 family object 2 sales float64 3 onpromotion float64 4 Sales_date object 5 City object 6 PROMO/NOPROMO object dtypes: float64(2), int64(1), object(4) memory usage: 183.6+ MB
# Changing both the merged data and holidays data to datetime
merged_data['Sales_date']= merged_data['Sales_date'].astype('str')
holidays['Sales_date']= holidays['Sales_date'].astype('str')
# Merging the holidays dataset with the Merged data
Merged_holiday_set = merged_data.merge(holidays, how='inner', on='Sales_date')
Merged_holiday_set
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | date | type | locale | locale_name | description | transferred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | 2013-01-01 | Holiday | National | Ecuador | Primer dia del ano | False |
| 1 | 1 | BABY CARE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | 2013-01-01 | Holiday | National | Ecuador | Primer dia del ano | False |
| 2 | 1 | BEAUTY | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | 2013-01-01 | Holiday | National | Ecuador | Primer dia del ano | False |
| 3 | 1 | BEVERAGES | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | 2013-01-01 | Holiday | National | Ecuador | Primer dia del ano | False |
| 4 | 1 | BOOKS | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | 2013-01-01 | Holiday | National | Ecuador | Primer dia del ano | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 509647 | 9 | POULTRY | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | 2015-12-25 | Holiday | National | Ecuador | Navidad | False |
| 509648 | 9 | PREPARED FOODS | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | 2015-12-25 | Holiday | National | Ecuador | Navidad | False |
| 509649 | 9 | PRODUCE | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | 2015-12-25 | Holiday | National | Ecuador | Navidad | False |
| 509650 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | 2015-12-25 | Holiday | National | Ecuador | Navidad | False |
| 509651 | 9 | SEAFOOD | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | 2015-12-25 | Holiday | National | Ecuador | Navidad | False |
509652 rows × 13 columns
# Dropping the date column
Merged_holiday_set.drop(columns = ["date"], inplace = True)
Merged_holiday_set
| store_nbr | family | sales | onpromotion | Sales_date | City | PROMO/NOPROMO | type | locale | locale_name | description | transferred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | Holiday | National | Ecuador | Primer dia del ano | False |
| 1 | 1 | BABY CARE | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | Holiday | National | Ecuador | Primer dia del ano | False |
| 2 | 1 | BEAUTY | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | Holiday | National | Ecuador | Primer dia del ano | False |
| 3 | 1 | BEVERAGES | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | Holiday | National | Ecuador | Primer dia del ano | False |
| 4 | 1 | BOOKS | 0.00 | 0.00 | 2013-01-01 | Quito | No Promo | Holiday | National | Ecuador | Primer dia del ano | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 509647 | 9 | POULTRY | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | Holiday | National | Ecuador | Navidad | False |
| 509648 | 9 | PREPARED FOODS | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | Holiday | National | Ecuador | Navidad | False |
| 509649 | 9 | PRODUCE | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | Holiday | National | Ecuador | Navidad | False |
| 509650 | 9 | SCHOOL AND OFFICE SUPPLIES | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | Holiday | National | Ecuador | Navidad | False |
| 509651 | 9 | SEAFOOD | 0.00 | 0.00 | 2015-12-25 | Quito | No Promo | Holiday | National | Ecuador | Navidad | False |
509652 rows × 12 columns
sns.countplot(Merged_holiday_set['locale'])
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel('Holiday', fontsize = 22, fontweight = 'bold')
plt.ylabel('Sales', fontsize = 22, fontweight = 'bold')
plt.title('Holidays and Sales', fontsize = 22, fontweight = 'bold')
plt.rcParams['figure.figsize'] = (18,8)
warnings.filterwarnings('ignore')
# Merging the train_set with the stores data set
Train = pd.merge(train_set, stores)
Train
| id | date | store_nbr | family | sales | onpromotion | Sales_date | city | state | type | cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 |
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 |
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 |
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 |
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3000883 | 3000883 | 2017-08-15 | 9 | POULTRY | 438.13 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 |
| 3000884 | 3000884 | 2017-08-15 | 9 | PREPARED FOODS | 154.55 | 1 | 2017-08-15 | Quito | Pichincha | B | 6 |
| 3000885 | 3000885 | 2017-08-15 | 9 | PRODUCE | 2419.73 | 148 | 2017-08-15 | Quito | Pichincha | B | 6 |
| 3000886 | 3000886 | 2017-08-15 | 9 | SCHOOL AND OFFICE SUPPLIES | 121.00 | 8 | 2017-08-15 | Quito | Pichincha | B | 6 |
| 3000887 | 3000887 | 2017-08-15 | 9 | SEAFOOD | 16.00 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 |
3000888 rows × 11 columns
#MErging the new Train set with oil data
Train = pd.merge(Train,Merged_oil_data, on = 'Sales_date')
Train.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3000888 entries, 0 to 3000887 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 id int64 1 date object 2 store_nbr int64 3 family object 4 sales float64 5 onpromotion int64 6 Sales_date object 7 city object 8 state object 9 type object 10 cluster int64 11 dcoilwtico float64 dtypes: float64(2), int64(4), object(6) memory usage: 297.6+ MB
# checking for completeness of date for holidays data
expected_dates = pd.date_range(start= train_set['Sales_date'].min(), end = train_set['Sales_date'].max())
expected_dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
'2013-01-09', '2013-01-10',
...
'2017-08-06', '2017-08-07', '2017-08-08', '2017-08-09',
'2017-08-10', '2017-08-11', '2017-08-12', '2017-08-13',
'2017-08-14', '2017-08-15'],
dtype='datetime64[ns]', length=1688, freq='D')
# Finding the missing holidays dates
missing_holiday_dates = set(expected_dates.date) - set(holidays["date"].unique())
missing_holiday_dates
{datetime.date(2014, 7, 24),
datetime.date(2016, 9, 27),
datetime.date(2013, 5, 10),
datetime.date(2015, 6, 17),
datetime.date(2015, 10, 22),
datetime.date(2017, 2, 24),
datetime.date(2017, 5, 28),
datetime.date(2015, 6, 18),
datetime.date(2017, 4, 5),
datetime.date(2014, 7, 30),
datetime.date(2017, 5, 15),
datetime.date(2014, 10, 4),
datetime.date(2016, 2, 29),
datetime.date(2015, 2, 11),
datetime.date(2015, 5, 31),
datetime.date(2014, 8, 11),
datetime.date(2017, 4, 14),
datetime.date(2015, 11, 9),
datetime.date(2015, 3, 25),
datetime.date(2016, 4, 5),
datetime.date(2014, 2, 8),
datetime.date(2017, 1, 12),
datetime.date(2013, 8, 2),
datetime.date(2013, 10, 3),
datetime.date(2013, 4, 29),
datetime.date(2016, 4, 14),
datetime.date(2013, 4, 22),
datetime.date(2017, 3, 8),
datetime.date(2014, 5, 13),
datetime.date(2016, 12, 23),
datetime.date(2017, 4, 23),
datetime.date(2013, 5, 13),
datetime.date(2017, 3, 24),
datetime.date(2015, 4, 6),
datetime.date(2015, 2, 2),
datetime.date(2014, 2, 6),
datetime.date(2013, 3, 5),
datetime.date(2013, 6, 8),
datetime.date(2014, 8, 24),
datetime.date(2014, 3, 2),
datetime.date(2017, 5, 30),
datetime.date(2013, 7, 19),
datetime.date(2015, 3, 22),
datetime.date(2016, 12, 21),
datetime.date(2017, 4, 15),
datetime.date(2015, 4, 14),
datetime.date(2017, 2, 19),
datetime.date(2013, 8, 5),
datetime.date(2016, 3, 18),
datetime.date(2014, 5, 17),
datetime.date(2017, 1, 18),
datetime.date(2016, 12, 28),
datetime.date(2017, 5, 9),
datetime.date(2014, 2, 9),
datetime.date(2015, 11, 27),
datetime.date(2014, 11, 16),
datetime.date(2013, 8, 17),
datetime.date(2013, 4, 25),
datetime.date(2017, 5, 2),
datetime.date(2017, 7, 18),
datetime.date(2013, 11, 1),
datetime.date(2016, 8, 9),
datetime.date(2014, 12, 2),
datetime.date(2014, 9, 13),
datetime.date(2016, 5, 9),
datetime.date(2015, 10, 18),
datetime.date(2014, 4, 2),
datetime.date(2016, 12, 30),
datetime.date(2015, 4, 29),
datetime.date(2014, 6, 18),
datetime.date(2016, 1, 13),
datetime.date(2016, 4, 27),
datetime.date(2015, 11, 28),
datetime.date(2013, 12, 27),
datetime.date(2014, 3, 6),
datetime.date(2013, 12, 28),
datetime.date(2013, 5, 5),
datetime.date(2017, 1, 20),
datetime.date(2013, 7, 13),
datetime.date(2014, 4, 5),
datetime.date(2015, 6, 13),
datetime.date(2014, 4, 15),
datetime.date(2014, 2, 19),
datetime.date(2013, 8, 4),
datetime.date(2017, 3, 29),
datetime.date(2014, 5, 25),
datetime.date(2013, 1, 24),
datetime.date(2014, 12, 13),
datetime.date(2016, 9, 28),
datetime.date(2015, 12, 20),
datetime.date(2015, 7, 8),
datetime.date(2014, 5, 8),
datetime.date(2016, 6, 10),
datetime.date(2015, 10, 15),
datetime.date(2015, 12, 24),
datetime.date(2014, 11, 18),
datetime.date(2016, 9, 25),
datetime.date(2014, 4, 16),
datetime.date(2013, 4, 13),
datetime.date(2013, 1, 30),
datetime.date(2014, 9, 27),
datetime.date(2016, 2, 1),
datetime.date(2016, 3, 3),
datetime.date(2015, 5, 21),
datetime.date(2016, 2, 5),
datetime.date(2014, 4, 24),
datetime.date(2016, 12, 9),
datetime.date(2015, 8, 12),
datetime.date(2014, 10, 30),
datetime.date(2014, 6, 25),
datetime.date(2014, 6, 10),
datetime.date(2013, 7, 22),
datetime.date(2015, 12, 21),
datetime.date(2014, 8, 20),
datetime.date(2014, 12, 23),
datetime.date(2015, 6, 8),
datetime.date(2014, 10, 29),
datetime.date(2014, 8, 19),
datetime.date(2014, 12, 30),
datetime.date(2016, 10, 6),
datetime.date(2017, 1, 21),
datetime.date(2015, 12, 11),
datetime.date(2016, 3, 10),
datetime.date(2013, 9, 12),
datetime.date(2015, 8, 1),
datetime.date(2013, 2, 5),
datetime.date(2016, 7, 11),
datetime.date(2014, 4, 20),
datetime.date(2017, 8, 14),
datetime.date(2016, 4, 2),
datetime.date(2014, 11, 19),
datetime.date(2015, 9, 25),
datetime.date(2016, 11, 25),
datetime.date(2015, 12, 3),
datetime.date(2016, 6, 29),
datetime.date(2017, 1, 7),
datetime.date(2013, 4, 3),
datetime.date(2017, 2, 27),
datetime.date(2017, 2, 20),
datetime.date(2017, 3, 3),
datetime.date(2017, 3, 22),
datetime.date(2013, 6, 5),
datetime.date(2017, 4, 7),
datetime.date(2014, 11, 26),
datetime.date(2014, 8, 17),
datetime.date(2015, 9, 24),
datetime.date(2016, 1, 8),
datetime.date(2014, 3, 20),
datetime.date(2015, 10, 6),
datetime.date(2017, 4, 3),
datetime.date(2016, 4, 24),
datetime.date(2014, 1, 5),
datetime.date(2015, 4, 10),
datetime.date(2016, 6, 18),
datetime.date(2015, 6, 21),
datetime.date(2017, 4, 26),
datetime.date(2015, 2, 28),
datetime.date(2013, 3, 2),
datetime.date(2016, 8, 28),
datetime.date(2015, 7, 5),
datetime.date(2017, 8, 15),
datetime.date(2016, 1, 25),
datetime.date(2016, 7, 3),
datetime.date(2014, 1, 28),
datetime.date(2015, 9, 2),
datetime.date(2014, 2, 16),
datetime.date(2017, 1, 8),
datetime.date(2016, 5, 2),
datetime.date(2015, 1, 25),
datetime.date(2015, 7, 28),
datetime.date(2015, 2, 15),
datetime.date(2013, 3, 7),
datetime.date(2014, 5, 20),
datetime.date(2014, 11, 6),
datetime.date(2013, 2, 2),
datetime.date(2014, 6, 17),
datetime.date(2014, 4, 29),
datetime.date(2014, 1, 21),
datetime.date(2014, 10, 11),
datetime.date(2015, 2, 21),
datetime.date(2013, 6, 3),
datetime.date(2014, 10, 23),
datetime.date(2016, 11, 14),
datetime.date(2016, 2, 28),
datetime.date(2016, 2, 20),
datetime.date(2016, 6, 27),
datetime.date(2015, 3, 17),
datetime.date(2014, 12, 12),
datetime.date(2015, 1, 17),
datetime.date(2014, 5, 1),
datetime.date(2015, 10, 5),
datetime.date(2017, 5, 21),
datetime.date(2017, 6, 18),
datetime.date(2015, 3, 4),
datetime.date(2016, 1, 2),
datetime.date(2013, 4, 7),
datetime.date(2014, 1, 12),
datetime.date(2013, 5, 23),
datetime.date(2014, 12, 14),
datetime.date(2014, 9, 23),
datetime.date(2016, 12, 31),
datetime.date(2017, 5, 22),
datetime.date(2015, 9, 17),
datetime.date(2016, 12, 10),
datetime.date(2014, 10, 5),
datetime.date(2013, 11, 24),
datetime.date(2016, 4, 29),
datetime.date(2016, 10, 25),
datetime.date(2014, 6, 7),
datetime.date(2014, 5, 14),
datetime.date(2015, 4, 27),
datetime.date(2015, 1, 24),
datetime.date(2016, 11, 6),
datetime.date(2014, 6, 22),
datetime.date(2015, 3, 3),
datetime.date(2013, 8, 22),
datetime.date(2015, 9, 8),
datetime.date(2016, 11, 7),
datetime.date(2014, 2, 20),
datetime.date(2016, 9, 8),
datetime.date(2013, 3, 28),
datetime.date(2014, 3, 24),
datetime.date(2016, 1, 11),
datetime.date(2013, 11, 8),
datetime.date(2015, 3, 23),
datetime.date(2016, 11, 16),
datetime.date(2014, 8, 28),
datetime.date(2016, 6, 17),
datetime.date(2015, 1, 19),
datetime.date(2014, 5, 9),
datetime.date(2013, 7, 28),
datetime.date(2015, 11, 3),
datetime.date(2017, 2, 17),
datetime.date(2014, 6, 4),
datetime.date(2017, 2, 25),
datetime.date(2016, 5, 29),
datetime.date(2015, 7, 22),
datetime.date(2016, 12, 18),
datetime.date(2014, 11, 28),
datetime.date(2015, 2, 19),
datetime.date(2014, 11, 27),
datetime.date(2014, 5, 27),
datetime.date(2014, 9, 2),
datetime.date(2017, 2, 22),
datetime.date(2014, 5, 11),
datetime.date(2015, 4, 16),
datetime.date(2015, 10, 28),
datetime.date(2014, 4, 10),
datetime.date(2015, 1, 3),
datetime.date(2016, 2, 26),
datetime.date(2017, 1, 31),
datetime.date(2015, 12, 5),
datetime.date(2016, 8, 26),
datetime.date(2014, 3, 10),
datetime.date(2016, 3, 12),
datetime.date(2015, 8, 27),
datetime.date(2014, 8, 23),
datetime.date(2015, 7, 16),
datetime.date(2013, 9, 21),
datetime.date(2013, 6, 14),
datetime.date(2013, 8, 3),
datetime.date(2015, 11, 19),
datetime.date(2015, 9, 5),
datetime.date(2017, 4, 27),
datetime.date(2016, 4, 11),
datetime.date(2014, 6, 5),
datetime.date(2013, 8, 20),
datetime.date(2013, 2, 15),
datetime.date(2015, 5, 10),
datetime.date(2014, 8, 12),
datetime.date(2016, 3, 26),
datetime.date(2013, 5, 31),
datetime.date(2014, 9, 25),
datetime.date(2014, 1, 19),
datetime.date(2017, 4, 29),
datetime.date(2014, 4, 6),
datetime.date(2013, 10, 4),
datetime.date(2014, 9, 1),
datetime.date(2016, 6, 8),
datetime.date(2014, 4, 21),
datetime.date(2013, 6, 15),
datetime.date(2013, 1, 26),
datetime.date(2013, 9, 1),
datetime.date(2016, 9, 30),
datetime.date(2014, 8, 1),
datetime.date(2013, 7, 12),
datetime.date(2014, 11, 24),
datetime.date(2013, 10, 10),
datetime.date(2014, 7, 19),
datetime.date(2013, 7, 21),
datetime.date(2015, 12, 17),
datetime.date(2014, 9, 6),
datetime.date(2015, 3, 14),
datetime.date(2015, 4, 23),
datetime.date(2016, 9, 16),
datetime.date(2013, 5, 4),
datetime.date(2013, 4, 16),
datetime.date(2017, 8, 4),
datetime.date(2013, 5, 21),
datetime.date(2017, 6, 11),
datetime.date(2013, 9, 27),
datetime.date(2014, 5, 12),
datetime.date(2014, 9, 4),
datetime.date(2015, 6, 26),
datetime.date(2017, 6, 4),
datetime.date(2014, 1, 30),
datetime.date(2014, 4, 30),
datetime.date(2015, 8, 4),
datetime.date(2015, 8, 8),
datetime.date(2015, 12, 13),
datetime.date(2016, 7, 22),
datetime.date(2016, 2, 16),
datetime.date(2014, 11, 3),
datetime.date(2015, 12, 12),
datetime.date(2013, 8, 15),
datetime.date(2017, 6, 2),
datetime.date(2014, 9, 19),
datetime.date(2015, 5, 29),
datetime.date(2016, 5, 5),
datetime.date(2017, 6, 23),
datetime.date(2017, 8, 1),
datetime.date(2015, 10, 9),
datetime.date(2016, 10, 11),
datetime.date(2013, 2, 3),
datetime.date(2017, 2, 26),
datetime.date(2016, 10, 22),
datetime.date(2017, 2, 15),
datetime.date(2015, 5, 18),
datetime.date(2013, 3, 21),
datetime.date(2014, 6, 23),
datetime.date(2015, 8, 10),
datetime.date(2016, 6, 25),
datetime.date(2014, 9, 9),
datetime.date(2013, 9, 17),
datetime.date(2016, 2, 7),
datetime.date(2014, 6, 8),
datetime.date(2013, 11, 18),
datetime.date(2017, 6, 26),
datetime.date(2017, 1, 9),
datetime.date(2013, 4, 23),
datetime.date(2015, 3, 21),
datetime.date(2015, 8, 23),
datetime.date(2017, 2, 12),
datetime.date(2014, 4, 14),
datetime.date(2015, 8, 21),
datetime.date(2014, 3, 27),
datetime.date(2015, 7, 29),
datetime.date(2016, 3, 14),
datetime.date(2017, 3, 11),
datetime.date(2015, 5, 27),
datetime.date(2016, 7, 17),
datetime.date(2014, 6, 12),
datetime.date(2016, 12, 29),
datetime.date(2015, 3, 26),
datetime.date(2014, 4, 11),
datetime.date(2014, 3, 25),
datetime.date(2015, 12, 10),
datetime.date(2015, 2, 12),
datetime.date(2015, 5, 15),
datetime.date(2014, 4, 4),
datetime.date(2016, 5, 25),
datetime.date(2016, 7, 4),
datetime.date(2015, 4, 5),
datetime.date(2013, 2, 1),
datetime.date(2014, 7, 10),
datetime.date(2013, 8, 8),
datetime.date(2015, 12, 29),
datetime.date(2013, 11, 10),
datetime.date(2014, 8, 9),
datetime.date(2016, 5, 27),
datetime.date(2015, 10, 14),
datetime.date(2014, 8, 27),
datetime.date(2016, 9, 2),
datetime.date(2014, 2, 18),
datetime.date(2013, 1, 14),
datetime.date(2013, 12, 12),
datetime.date(2014, 4, 3),
datetime.date(2013, 4, 6),
datetime.date(2016, 4, 18),
datetime.date(2017, 4, 4),
datetime.date(2015, 6, 29),
datetime.date(2014, 1, 7),
datetime.date(2017, 5, 3),
datetime.date(2013, 5, 30),
datetime.date(2015, 1, 22),
datetime.date(2013, 1, 12),
datetime.date(2015, 8, 19),
datetime.date(2017, 3, 14),
datetime.date(2016, 5, 1),
datetime.date(2014, 11, 23),
datetime.date(2016, 5, 13),
datetime.date(2016, 3, 1),
datetime.date(2015, 10, 3),
datetime.date(2016, 8, 25),
datetime.date(2017, 3, 26),
datetime.date(2013, 1, 28),
datetime.date(2013, 4, 10),
datetime.date(2017, 1, 16),
datetime.date(2016, 10, 28),
datetime.date(2014, 10, 7),
datetime.date(2014, 4, 25),
datetime.date(2014, 11, 7),
datetime.date(2014, 6, 30),
datetime.date(2016, 11, 10),
datetime.date(2014, 10, 1),
datetime.date(2013, 12, 24),
datetime.date(2013, 5, 7),
datetime.date(2016, 12, 4),
datetime.date(2015, 10, 17),
datetime.date(2016, 3, 25),
datetime.date(2013, 3, 12),
datetime.date(2013, 3, 11),
datetime.date(2015, 9, 7),
datetime.date(2014, 10, 25),
datetime.date(2013, 10, 7),
datetime.date(2017, 6, 10),
datetime.date(2014, 2, 21),
datetime.date(2015, 7, 18),
datetime.date(2013, 11, 6),
datetime.date(2013, 2, 10),
datetime.date(2014, 12, 6),
datetime.date(2016, 9, 10),
datetime.date(2017, 7, 14),
datetime.date(2014, 3, 11),
datetime.date(2016, 1, 3),
datetime.date(2014, 7, 4),
datetime.date(2014, 12, 3),
datetime.date(2017, 6, 6),
datetime.date(2014, 7, 21),
datetime.date(2015, 3, 24),
datetime.date(2016, 6, 24),
datetime.date(2017, 5, 6),
datetime.date(2016, 10, 18),
datetime.date(2013, 6, 19),
datetime.date(2016, 7, 28),
datetime.date(2014, 8, 30),
datetime.date(2014, 1, 10),
datetime.date(2013, 6, 21),
datetime.date(2013, 2, 17),
datetime.date(2016, 10, 3),
datetime.date(2013, 10, 21),
datetime.date(2017, 5, 20),
datetime.date(2016, 11, 2),
datetime.date(2017, 2, 2),
datetime.date(2015, 3, 10),
datetime.date(2015, 5, 28),
datetime.date(2013, 9, 10),
datetime.date(2013, 7, 15),
datetime.date(2014, 8, 31),
datetime.date(2017, 3, 6),
datetime.date(2016, 11, 18),
datetime.date(2013, 5, 3),
datetime.date(2014, 11, 1),
datetime.date(2015, 12, 7),
datetime.date(2013, 9, 6),
datetime.date(2015, 9, 6),
datetime.date(2016, 7, 23),
datetime.date(2017, 1, 30),
datetime.date(2015, 6, 2),
datetime.date(2013, 2, 14),
datetime.date(2013, 7, 27),
datetime.date(2014, 12, 8),
datetime.date(2016, 12, 20),
datetime.date(2017, 7, 21),
datetime.date(2015, 4, 17),
datetime.date(2016, 7, 20),
datetime.date(2014, 8, 15),
datetime.date(2014, 9, 22),
datetime.date(2013, 2, 13),
datetime.date(2014, 3, 29),
datetime.date(2014, 7, 31),
datetime.date(2016, 12, 16),
datetime.date(2016, 11, 17),
datetime.date(2017, 4, 17),
datetime.date(2016, 5, 4),
datetime.date(2013, 11, 29),
datetime.date(2016, 8, 12),
datetime.date(2013, 7, 29),
datetime.date(2015, 1, 20),
datetime.date(2013, 10, 18),
datetime.date(2014, 3, 16),
datetime.date(2015, 9, 15),
datetime.date(2016, 2, 24),
datetime.date(2013, 10, 9),
datetime.date(2014, 4, 1),
datetime.date(2013, 2, 27),
datetime.date(2013, 1, 13),
datetime.date(2014, 5, 2),
datetime.date(2014, 3, 5),
datetime.date(2017, 8, 12),
datetime.date(2016, 7, 6),
datetime.date(2014, 5, 29),
datetime.date(2014, 10, 6),
datetime.date(2015, 6, 19),
datetime.date(2014, 12, 20),
datetime.date(2015, 5, 22),
datetime.date(2017, 2, 11),
datetime.date(2016, 6, 15),
datetime.date(2016, 8, 19),
datetime.date(2017, 7, 17),
datetime.date(2016, 6, 28),
datetime.date(2017, 8, 7),
datetime.date(2013, 10, 29),
datetime.date(2015, 5, 26),
datetime.date(2013, 4, 20),
datetime.date(2014, 11, 4),
datetime.date(2013, 12, 23),
datetime.date(2016, 8, 1),
datetime.date(2015, 9, 27),
datetime.date(2016, 3, 24),
datetime.date(2015, 12, 22),
datetime.date(2015, 5, 11),
datetime.date(2013, 4, 28),
datetime.date(2014, 2, 12),
datetime.date(2016, 12, 17),
datetime.date(2013, 3, 15),
datetime.date(2015, 7, 25),
datetime.date(2013, 3, 10),
datetime.date(2013, 1, 17),
datetime.date(2013, 2, 21),
datetime.date(2015, 3, 20),
datetime.date(2014, 10, 8),
datetime.date(2016, 11, 13),
datetime.date(2013, 4, 9),
datetime.date(2017, 5, 14),
datetime.date(2016, 6, 20),
datetime.date(2014, 10, 15),
datetime.date(2014, 12, 7),
datetime.date(2016, 9, 5),
datetime.date(2017, 8, 8),
datetime.date(2017, 4, 24),
datetime.date(2013, 1, 22),
datetime.date(2016, 11, 20),
datetime.date(2017, 7, 19),
datetime.date(2016, 10, 13),
datetime.date(2014, 2, 25),
datetime.date(2013, 7, 11),
datetime.date(2016, 6, 14),
datetime.date(2015, 1, 4),
datetime.date(2015, 11, 14),
datetime.date(2015, 4, 11),
datetime.date(2013, 12, 30),
datetime.date(2016, 3, 7),
datetime.date(2013, 11, 16),
datetime.date(2016, 5, 23),
datetime.date(2013, 9, 8),
datetime.date(2015, 10, 1),
datetime.date(2014, 1, 23),
datetime.date(2016, 3, 21),
datetime.date(2016, 4, 10),
datetime.date(2013, 3, 4),
datetime.date(2017, 5, 4),
datetime.date(2014, 2, 7),
datetime.date(2014, 11, 21),
datetime.date(2017, 4, 12),
datetime.date(2015, 2, 18),
datetime.date(2015, 3, 16),
datetime.date(2014, 1, 29),
datetime.date(2014, 5, 21),
datetime.date(2014, 8, 8),
datetime.date(2013, 10, 26),
datetime.date(2016, 5, 14),
datetime.date(2014, 9, 21),
datetime.date(2016, 8, 17),
datetime.date(2015, 4, 3),
datetime.date(2015, 9, 13),
datetime.date(2016, 4, 21),
datetime.date(2013, 6, 30),
datetime.date(2014, 12, 18),
datetime.date(2016, 9, 24),
datetime.date(2014, 2, 22),
datetime.date(2014, 3, 3),
datetime.date(2013, 10, 11),
datetime.date(2016, 2, 15),
datetime.date(2014, 4, 9),
datetime.date(2016, 8, 18),
datetime.date(2013, 7, 10),
datetime.date(2013, 3, 30),
datetime.date(2013, 5, 8),
datetime.date(2015, 11, 2),
datetime.date(2014, 8, 25),
datetime.date(2013, 6, 20),
datetime.date(2015, 12, 1),
datetime.date(2015, 9, 28),
datetime.date(2015, 5, 13),
datetime.date(2016, 6, 4),
datetime.date(2013, 11, 27),
datetime.date(2017, 8, 6),
datetime.date(2015, 12, 14),
datetime.date(2015, 11, 11),
datetime.date(2016, 9, 11),
datetime.date(2017, 1, 28),
datetime.date(2014, 1, 18),
datetime.date(2013, 9, 30),
datetime.date(2015, 2, 3),
datetime.date(2013, 7, 26),
datetime.date(2015, 5, 25),
datetime.date(2017, 7, 2),
datetime.date(2014, 2, 11),
datetime.date(2014, 7, 9),
datetime.date(2015, 10, 30),
datetime.date(2015, 11, 12),
datetime.date(2017, 2, 8),
datetime.date(2015, 4, 19),
datetime.date(2013, 11, 20),
datetime.date(2016, 6, 23),
datetime.date(2015, 11, 21),
datetime.date(2014, 10, 19),
datetime.date(2017, 7, 15),
datetime.date(2017, 3, 4),
datetime.date(2016, 1, 4),
datetime.date(2013, 5, 18),
datetime.date(2015, 4, 30),
datetime.date(2016, 12, 1),
datetime.date(2015, 4, 20),
datetime.date(2016, 1, 1),
datetime.date(2014, 6, 27),
datetime.date(2014, 8, 16),
datetime.date(2017, 5, 27),
datetime.date(2016, 2, 3),
datetime.date(2014, 1, 26),
datetime.date(2013, 3, 3),
datetime.date(2014, 2, 10),
datetime.date(2015, 11, 10),
datetime.date(2013, 9, 11),
datetime.date(2014, 5, 6),
datetime.date(2014, 10, 9),
datetime.date(2013, 2, 16),
datetime.date(2013, 12, 7),
datetime.date(2014, 11, 13),
datetime.date(2013, 2, 25),
datetime.date(2017, 7, 30),
datetime.date(2014, 9, 10),
datetime.date(2015, 5, 1),
datetime.date(2016, 5, 16),
datetime.date(2015, 6, 1),
datetime.date(2013, 11, 9),
datetime.date(2017, 1, 1),
datetime.date(2014, 11, 14),
datetime.date(2014, 4, 22),
datetime.date(2015, 8, 16),
datetime.date(2016, 1, 6),
datetime.date(2013, 12, 15),
datetime.date(2013, 3, 19),
datetime.date(2015, 12, 15),
datetime.date(2015, 8, 9),
datetime.date(2013, 7, 24),
datetime.date(2015, 7, 2),
datetime.date(2015, 7, 20),
datetime.date(2013, 8, 6),
datetime.date(2016, 5, 6),
datetime.date(2014, 4, 18),
datetime.date(2013, 4, 5),
datetime.date(2013, 9, 18),
datetime.date(2017, 4, 22),
datetime.date(2014, 9, 26),
datetime.date(2014, 9, 15),
datetime.date(2014, 11, 30),
datetime.date(2013, 5, 15),
datetime.date(2014, 2, 13),
datetime.date(2016, 3, 6),
datetime.date(2016, 9, 9),
datetime.date(2016, 6, 5),
datetime.date(2017, 5, 1),
datetime.date(2013, 3, 24),
datetime.date(2015, 4, 1),
datetime.date(2016, 9, 1),
datetime.date(2015, 7, 30),
datetime.date(2016, 3, 30),
datetime.date(2015, 6, 30),
datetime.date(2014, 5, 24),
datetime.date(2014, 10, 21),
datetime.date(2015, 7, 21),
datetime.date(2013, 6, 13),
datetime.date(2014, 8, 22),
datetime.date(2014, 9, 5),
datetime.date(2015, 9, 1),
datetime.date(2016, 8, 23),
datetime.date(2017, 4, 10),
datetime.date(2014, 10, 14),
datetime.date(2013, 11, 11),
datetime.date(2016, 11, 9),
datetime.date(2016, 8, 20),
datetime.date(2015, 2, 5),
datetime.date(2016, 4, 15),
datetime.date(2013, 1, 18),
datetime.date(2014, 9, 11),
datetime.date(2017, 1, 27),
datetime.date(2015, 7, 13),
datetime.date(2013, 6, 27),
datetime.date(2015, 3, 1),
datetime.date(2015, 3, 11),
datetime.date(2015, 7, 17),
datetime.date(2013, 5, 11),
datetime.date(2015, 6, 23),
datetime.date(2014, 12, 24),
datetime.date(2013, 2, 4),
datetime.date(2014, 7, 20),
datetime.date(2013, 9, 15),
datetime.date(2013, 10, 5),
datetime.date(2013, 12, 25),
datetime.date(2015, 5, 23),
datetime.date(2016, 4, 17),
datetime.date(2017, 5, 5),
datetime.date(2015, 10, 26),
datetime.date(2014, 7, 13),
datetime.date(2016, 5, 19),
datetime.date(2013, 10, 2),
datetime.date(2015, 1, 2),
datetime.date(2013, 2, 24),
datetime.date(2013, 3, 26),
datetime.date(2015, 1, 8),
datetime.date(2015, 1, 18),
datetime.date(2015, 11, 15),
datetime.date(2016, 10, 8),
datetime.date(2017, 7, 6),
datetime.date(2016, 11, 5),
datetime.date(2017, 5, 13),
datetime.date(2015, 8, 28),
datetime.date(2014, 3, 28),
datetime.date(2013, 8, 10),
datetime.date(2013, 6, 12),
datetime.date(2015, 6, 16),
datetime.date(2017, 1, 15),
datetime.date(2014, 4, 7),
datetime.date(2014, 7, 23),
datetime.date(2014, 9, 30),
datetime.date(2017, 2, 28),
datetime.date(2014, 1, 9),
datetime.date(2017, 2, 1),
datetime.date(2016, 8, 3),
datetime.date(2015, 10, 19),
datetime.date(2013, 8, 31),
datetime.date(2015, 4, 24),
datetime.date(2016, 7, 15),
datetime.date(2016, 2, 21),
datetime.date(2013, 9, 29),
datetime.date(2014, 6, 3),
datetime.date(2016, 4, 22),
datetime.date(2016, 11, 21),
datetime.date(2016, 2, 6),
datetime.date(2017, 2, 16),
datetime.date(2017, 5, 18),
datetime.date(2017, 1, 2),
datetime.date(2017, 7, 29),
datetime.date(2017, 6, 12),
datetime.date(2015, 7, 4),
datetime.date(2015, 4, 12),
datetime.date(2014, 7, 11),
datetime.date(2015, 12, 25),
datetime.date(2015, 7, 6),
datetime.date(2014, 2, 17),
datetime.date(2016, 9, 18),
datetime.date(2013, 11, 14),
datetime.date(2015, 4, 22),
datetime.date(2017, 6, 21),
datetime.date(2015, 10, 7),
datetime.date(2013, 8, 14),
datetime.date(2013, 10, 6),
datetime.date(2016, 8, 6),
datetime.date(2016, 4, 8),
datetime.date(2013, 5, 22),
datetime.date(2014, 11, 8),
datetime.date(2016, 8, 24),
datetime.date(2014, 1, 22),
datetime.date(2016, 11, 19),
datetime.date(2014, 7, 25),
datetime.date(2015, 10, 24),
datetime.date(2016, 4, 1),
datetime.date(2017, 3, 9),
datetime.date(2013, 1, 9),
datetime.date(2015, 4, 9),
datetime.date(2016, 2, 22),
datetime.date(2016, 6, 11),
datetime.date(2016, 11, 12),
datetime.date(2013, 6, 28),
datetime.date(2017, 8, 3),
datetime.date(2013, 2, 26),
datetime.date(2013, 10, 8),
datetime.date(2015, 10, 11),
datetime.date(2013, 11, 28),
datetime.date(2015, 11, 4),
datetime.date(2015, 10, 10),
datetime.date(2013, 8, 9),
datetime.date(2015, 1, 30),
datetime.date(2015, 8, 22),
datetime.date(2013, 6, 26),
datetime.date(2016, 4, 19),
datetime.date(2013, 11, 3),
datetime.date(2016, 5, 28),
datetime.date(2015, 6, 27),
datetime.date(2016, 9, 4),
datetime.date(2016, 7, 19),
datetime.date(2013, 1, 20),
datetime.date(2016, 3, 11),
datetime.date(2016, 6, 16),
datetime.date(2017, 3, 2),
datetime.date(2015, 6, 11),
datetime.date(2013, 10, 19),
datetime.date(2014, 1, 3),
datetime.date(2017, 5, 31),
datetime.date(2014, 7, 29),
datetime.date(2013, 9, 2),
datetime.date(2015, 1, 1),
datetime.date(2014, 10, 20),
datetime.date(2014, 8, 14),
datetime.date(2013, 1, 2),
datetime.date(2016, 7, 7),
datetime.date(2013, 4, 24),
datetime.date(2017, 7, 4),
datetime.date(2016, 12, 15),
datetime.date(2015, 6, 3),
datetime.date(2014, 3, 21),
datetime.date(2014, 12, 17),
datetime.date(2015, 6, 5),
datetime.date(2013, 10, 27),
datetime.date(2013, 4, 8),
datetime.date(2016, 5, 31),
datetime.date(2016, 2, 19),
datetime.date(2016, 2, 13),
datetime.date(2015, 7, 7),
datetime.date(2014, 2, 3),
datetime.date(2016, 3, 2),
datetime.date(2016, 10, 16),
datetime.date(2016, 3, 5),
datetime.date(2015, 2, 23),
datetime.date(2013, 6, 6),
datetime.date(2015, 7, 14),
datetime.date(2014, 10, 12),
datetime.date(2017, 6, 22),
datetime.date(2015, 11, 30),
datetime.date(2014, 7, 8),
datetime.date(2016, 1, 30),
datetime.date(2014, 8, 7),
datetime.date(2014, 10, 31),
datetime.date(2013, 6, 4),
datetime.date(2016, 7, 29),
datetime.date(2016, 1, 10),
datetime.date(2017, 2, 9),
datetime.date(2015, 2, 9),
datetime.date(2016, 7, 16),
datetime.date(2017, 6, 17),
datetime.date(2016, 12, 6),
datetime.date(2015, 11, 7),
datetime.date(2013, 12, 21),
datetime.date(2014, 5, 28),
datetime.date(2016, 9, 20),
datetime.date(2014, 10, 17),
datetime.date(2016, 1, 23),
datetime.date(2015, 10, 27),
datetime.date(2013, 11, 30),
datetime.date(2014, 2, 27),
datetime.date(2017, 4, 16),
datetime.date(2014, 1, 6),
datetime.date(2015, 9, 18),
datetime.date(2016, 7, 13),
datetime.date(2016, 3, 23),
datetime.date(2016, 10, 27),
datetime.date(2015, 4, 26),
datetime.date(2015, 8, 24),
datetime.date(2016, 1, 20),
datetime.date(2013, 12, 31),
datetime.date(2017, 1, 24),
datetime.date(2014, 12, 19),
datetime.date(2015, 2, 4),
datetime.date(2015, 3, 6),
datetime.date(2014, 9, 28),
datetime.date(2016, 10, 19),
datetime.date(2013, 10, 20),
datetime.date(2016, 11, 29),
datetime.date(2013, 4, 27),
datetime.date(2014, 12, 4),
datetime.date(2015, 9, 4),
datetime.date(2016, 3, 15),
datetime.date(2016, 9, 12),
datetime.date(2017, 3, 27),
datetime.date(2015, 10, 20),
datetime.date(2016, 8, 30),
datetime.date(2016, 9, 13),
datetime.date(2017, 4, 6),
datetime.date(2015, 10, 21),
datetime.date(2014, 10, 22),
datetime.date(2015, 6, 24),
datetime.date(2013, 3, 25),
datetime.date(2017, 7, 9),
datetime.date(2014, 3, 30),
datetime.date(2013, 3, 23),
datetime.date(2015, 2, 27),
datetime.date(2016, 10, 20),
datetime.date(2014, 4, 26),
datetime.date(2017, 7, 27),
datetime.date(2013, 7, 8),
datetime.date(2016, 3, 27),
datetime.date(2014, 7, 26),
datetime.date(2013, 12, 8),
datetime.date(2017, 1, 3),
datetime.date(2017, 3, 25),
datetime.date(2016, 4, 25),
datetime.date(2015, 5, 30),
datetime.date(2017, 8, 13),
datetime.date(2015, 7, 26),
datetime.date(2013, 12, 4),
datetime.date(2016, 6, 9),
datetime.date(2015, 6, 22),
datetime.date(2015, 8, 6),
datetime.date(2017, 3, 18),
datetime.date(2016, 8, 4),
datetime.date(2017, 1, 5),
datetime.date(2016, 5, 30),
datetime.date(2013, 1, 31),
datetime.date(2016, 3, 16),
datetime.date(2016, 9, 21),
datetime.date(2014, 6, 13),
datetime.date(2014, 9, 14),
datetime.date(2014, 8, 5),
datetime.date(2016, 1, 21),
datetime.date(2015, 1, 11),
datetime.date(2017, 6, 9),
datetime.date(2014, 6, 16),
datetime.date(2016, 8, 14),
datetime.date(2013, 12, 16),
datetime.date(2015, 8, 3),
datetime.date(2016, 6, 13),
datetime.date(2016, 5, 15),
datetime.date(2015, 9, 22),
datetime.date(2015, 12, 23),
datetime.date(2014, 7, 2),
datetime.date(2016, 12, 22),
datetime.date(2016, 4, 23),
datetime.date(2017, 7, 20),
datetime.date(2015, 3, 9),
datetime.date(2016, 7, 30),
datetime.date(2014, 7, 1),
datetime.date(2013, 7, 3),
datetime.date(2016, 12, 11),
datetime.date(2015, 11, 13),
datetime.date(2015, 10, 4),
datetime.date(2013, 8, 11),
datetime.date(2014, 9, 3),
datetime.date(2013, 11, 4),
datetime.date(2013, 4, 2),
datetime.date(2017, 1, 14),
datetime.date(2016, 6, 19),
datetime.date(2016, 10, 15),
datetime.date(2013, 6, 9),
datetime.date(2013, 8, 23),
datetime.date(2015, 4, 25),
datetime.date(2015, 4, 2),
datetime.date(2015, 12, 2),
datetime.date(2016, 8, 10),
datetime.date(2016, 4, 9),
datetime.date(2014, 12, 21),
datetime.date(2016, 11, 22),
datetime.date(2014, 12, 27),
datetime.date(2017, 3, 13),
datetime.date(2013, 10, 30),
datetime.date(2015, 6, 6),
datetime.date(2013, 1, 25),
datetime.date(2013, 12, 9),
datetime.date(2014, 11, 2),
datetime.date(2015, 9, 16),
datetime.date(2014, 5, 31),
datetime.date(2015, 1, 13),
datetime.date(2013, 12, 13),
datetime.date(2013, 9, 22),
datetime.date(2013, 11, 21),
datetime.date(2014, 6, 24),
datetime.date(2015, 3, 2),
datetime.date(2017, 5, 10),
datetime.date(2016, 9, 14),
datetime.date(2014, 5, 18),
datetime.date(2015, 2, 22),
datetime.date(2017, 5, 7),
datetime.date(2017, 5, 19),
datetime.date(2017, 6, 30),
datetime.date(2016, 11, 27),
datetime.date(2015, 1, 10),
datetime.date(2016, 8, 8),
datetime.date(2015, 1, 29),
datetime.date(2014, 7, 5),
datetime.date(2016, 4, 3),
datetime.date(2015, 10, 8),
datetime.date(2016, 3, 28),
datetime.date(2016, 2, 12),
datetime.date(2016, 5, 3),
datetime.date(2013, 3, 6),
datetime.date(2014, 8, 6),
datetime.date(2015, 7, 19),
datetime.date(2016, 11, 30),
datetime.date(2014, 5, 4),
datetime.date(2013, 2, 28),
datetime.date(2017, 3, 1),
datetime.date(2015, 7, 11),
datetime.date(2014, 4, 19),
datetime.date(2017, 3, 15),
datetime.date(2016, 11, 15),
datetime.date(2017, 1, 4),
datetime.date(2013, 4, 1),
datetime.date(2013, 10, 15),
datetime.date(2013, 1, 27),
...}
# adding the missing holidays date
holidays_add = pd.DataFrame(missing_holiday_dates, columns = ["date"])
holidays_add
| date | |
|---|---|
| 0 | 2014-07-24 |
| 1 | 2016-09-27 |
| 2 | 2013-05-10 |
| 3 | 2015-06-17 |
| 4 | 2015-10-22 |
| ... | ... |
| 1683 | 2016-12-26 |
| 1684 | 2015-02-24 |
| 1685 | 2013-01-07 |
| 1686 | 2013-11-17 |
| 1687 | 2014-02-24 |
1688 rows × 1 columns
# renaming columns
holidays_add.rename(columns = {"date":"Sales_date"}, inplace = True)
holidays_add
| Sales_date | |
|---|---|
| 0 | 2014-07-24 |
| 1 | 2016-09-27 |
| 2 | 2013-05-10 |
| 3 | 2015-06-17 |
| 4 | 2015-10-22 |
| ... | ... |
| 1683 | 2016-12-26 |
| 1684 | 2015-02-24 |
| 1685 | 2013-01-07 |
| 1686 | 2013-11-17 |
| 1687 | 2014-02-24 |
1688 rows × 1 columns
#dropping columns
holidays.drop(columns = ["date",'description'], inplace = True)
holidays.head()
| type | locale | locale_name | transferred | Sales_date | |
|---|---|---|---|---|---|
| 0 | Holiday | Local | Manta | False | 2012-03-02 |
| 1 | Holiday | Regional | Cotopaxi | False | 2012-04-01 |
| 2 | Holiday | Local | Cuenca | False | 2012-04-12 |
| 3 | Holiday | Local | Libertad | False | 2012-04-14 |
| 4 | Holiday | Local | Riobamba | False | 2012-04-21 |
holidays.rename(columns = {"Sales_date":"sales_date"}, inplace = True)
holidays
| type | locale | locale_name | transferred | sales_date | |
|---|---|---|---|---|---|
| 0 | Holiday | Local | Manta | False | 2012-03-02 |
| 1 | Holiday | Regional | Cotopaxi | False | 2012-04-01 |
| 2 | Holiday | Local | Cuenca | False | 2012-04-12 |
| 3 | Holiday | Local | Libertad | False | 2012-04-14 |
| 4 | Holiday | Local | Riobamba | False | 2012-04-21 |
| ... | ... | ... | ... | ... | ... |
| 345 | Additional | National | Ecuador | False | 2017-12-22 |
| 346 | Additional | National | Ecuador | False | 2017-12-23 |
| 347 | Additional | National | Ecuador | False | 2017-12-24 |
| 348 | Holiday | National | Ecuador | False | 2017-12-25 |
| 349 | Additional | National | Ecuador | False | 2017-12-26 |
350 rows × 5 columns
# changing holidays date to datetime
holidays['Sales_date'] = pd.to_datetime(holidays['sales_date']).dt.date
# merging data
holidays = holidays_add.merge(holidays ,how='left', on='Sales_date')
#holidays.head()
#holidays.isnull().sum()
# merging the Train data with complete holidays dataset
Train = pd.merge(Train, holidays, on = "Sales_date")
Train
| id | date | store_nbr | family | sales | onpromotion | Sales_date | city | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3054343 | 3000883 | 2017-08-15 | 9 | POULTRY | 438.13 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054344 | 3000884 | 2017-08-15 | 9 | PREPARED FOODS | 154.55 | 1 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054345 | 3000885 | 2017-08-15 | 9 | PRODUCE | 2419.73 | 148 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054346 | 3000886 | 2017-08-15 | 9 | SCHOOL AND OFFICE SUPPLIES | 121.00 | 8 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054347 | 3000887 | 2017-08-15 | 9 | SEAFOOD | 16.00 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
3054348 rows × 17 columns
# Checking for nll values
Train.isnull().sum()
id 0 date 0 store_nbr 0 family 0 sales 0 onpromotion 0 Sales_date 0 city 0 state 0 type_x 0 cluster 0 dcoilwtico 0 type_y 2551824 locale 2551824 locale_name 2551824 transferred 2551824 sales_date 2551824 dtype: int64
# Filling the nulls in the holiday data
Train["type_y"] = Train["type_y"].fillna("Work Day")
Train["locale"] = Train["locale"].fillna("National")
Train["transferred"] = Train["transferred"].fillna(False)
Train.isnull().sum()
id 0 date 0 store_nbr 0 family 0 sales 0 onpromotion 0 Sales_date 0 city 0 state 0 type_x 0 cluster 0 dcoilwtico 0 type_y 0 locale 0 locale_name 2551824 transferred 0 sales_date 2551824 dtype: int64
Train
| id | date | store_nbr | family | sales | onpromotion | Sales_date | city | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.00 | 0 | 2013-01-01 | Quito | Pichincha | D | 13 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3054343 | 3000883 | 2017-08-15 | 9 | POULTRY | 438.13 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054344 | 3000884 | 2017-08-15 | 9 | PREPARED FOODS | 154.55 | 1 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054345 | 3000885 | 2017-08-15 | 9 | PRODUCE | 2419.73 | 148 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054346 | 3000886 | 2017-08-15 | 9 | SCHOOL AND OFFICE SUPPLIES | 121.00 | 8 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
| 3054347 | 3000887 | 2017-08-15 | 9 | SEAFOOD | 16.00 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 |
3054348 rows × 17 columns
# chamging the dates column in transactions dataset to datetime.
transactions['Sales_date'] = pd.to_datetime(transactions['date']).dt.date
# merging Train data with transactions data
Train = pd.merge(Train, transactions, on = ["Sales_date", "store_nbr"])
Train.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2805231 entries, 0 to 2805230 Data columns (total 19 columns): # Column Dtype --- ------ ----- 0 id int64 1 date_x object 2 store_nbr int64 3 family object 4 sales float64 5 onpromotion int64 6 Sales_date object 7 city object 8 state object 9 type_x object 10 cluster int64 11 dcoilwtico float64 12 type_y object 13 locale object 14 locale_name object 15 transferred bool 16 sales_date object 17 date_y object 18 transactions int64 dtypes: bool(1), float64(2), int64(5), object(11) memory usage: 409.3+ MB
Train
| id | date_x | store_nbr | family | sales | onpromotion | Sales_date | city | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | date_y | transactions | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 561 | 2013-01-01 | 25 | AUTOMOTIVE | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 |
| 1 | 562 | 2013-01-01 | 25 | BABY CARE | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 |
| 2 | 563 | 2013-01-01 | 25 | BEAUTY | 2.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 |
| 3 | 564 | 2013-01-01 | 25 | BEVERAGES | 810.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 |
| 4 | 565 | 2013-01-01 | 25 | BOOKS | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 3000883 | 2017-08-15 | 9 | POULTRY | 438.13 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 | 2017-08-15 | 2155 |
| 2805227 | 3000884 | 2017-08-15 | 9 | PREPARED FOODS | 154.55 | 1 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 | 2017-08-15 | 2155 |
| 2805228 | 3000885 | 2017-08-15 | 9 | PRODUCE | 2419.73 | 148 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 | 2017-08-15 | 2155 |
| 2805229 | 3000886 | 2017-08-15 | 9 | SCHOOL AND OFFICE SUPPLIES | 121.00 | 8 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 | 2017-08-15 | 2155 |
| 2805230 | 3000887 | 2017-08-15 | 9 | SEAFOOD | 16.00 | 0 | 2017-08-15 | Quito | Pichincha | B | 6 | 47.57 | Holiday | Local | Riobamba | False | 2017-08-15 | 2017-08-15 | 2155 |
2805231 rows × 19 columns
def getSeason(row):
if row in (3,4,5):
return 'Spring'
elif row in (6,7,8):
return 'Summer'
elif row in (9,10,11):
return 'Fall'
elif row in (12,1,2):
return 'Winter'
def getDateFeatures(df, date):
df['date'] = pd.to_datetime(df[date])
df['month'] = df['date'].dt.month
df['day_of_month'] = df['date'].dt.day
df['day_of_year'] = df['date'].dt.dayofyear
df['week_of_year'] = df['date'].dt.isocalendar().week
df['day_of_week'] = df['date'].dt.dayofweek
df['year'] = df['date'].dt.year
df['is_weekend'] = np.where(df['day_of_week'] > 4, 1, 0)
df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
df['quarter'] = df['date'].dt.quarter
df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
df['is_year_start'] = df['date'].dt.is_year_end.astype(int)
df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
df['season'] = df['month'].apply(getSeason)
return df
# Getting date features for train data set
Train = getDateFeatures(Train, 'Sales_date')
pd.set_option('display.max_columns', None)
Train.head()
| id | date_x | store_nbr | family | sales | onpromotion | Sales_date | city | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | date_y | transactions | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 561 | 2013-01-01 | 25 | AUTOMOTIVE | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 1 | 562 | 2013-01-01 | 25 | BABY CARE | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 2 | 563 | 2013-01-01 | 25 | BEAUTY | 2.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 3 | 564 | 2013-01-01 | 25 | BEVERAGES | 810.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 4 | 565 | 2013-01-01 | 25 | BOOKS | 0.00 | 0 | 2013-01-01 | Salinas | Santa Elena | D | 1 | 93.14 | Holiday | National | Ecuador | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
from sklearn.preprocessing import LabelEncoder
# applying label encoding to the following columns
le = LabelEncoder()
Train['family'] = le.fit_transform(Train['family'])
Train['locale_name'] = le.fit_transform(Train['locale_name'])
Train['state'] = le.fit_transform(Train['state'])
Train['locale'] = le.fit_transform(Train['locale'])
Train
| id | date_x | store_nbr | family | sales | onpromotion | Sales_date | city | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | date_y | transactions | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 561 | 2013-01-01 | 25 | 0 | 0.00 | 0 | 2013-01-01 | Salinas | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 1 | 562 | 2013-01-01 | 25 | 1 | 0.00 | 0 | 2013-01-01 | Salinas | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 2 | 563 | 2013-01-01 | 25 | 2 | 2.00 | 0 | 2013-01-01 | Salinas | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 3 | 564 | 2013-01-01 | 25 | 3 | 810.00 | 0 | 2013-01-01 | Salinas | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| 4 | 565 | 2013-01-01 | 25 | 4 | 0.00 | 0 | 2013-01-01 | Salinas | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 3000883 | 2017-08-15 | 9 | 28 | 438.13 | 0 | 2017-08-15 | Quito | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2805227 | 3000884 | 2017-08-15 | 9 | 29 | 154.55 | 1 | 2017-08-15 | Quito | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2805228 | 3000885 | 2017-08-15 | 9 | 30 | 2419.73 | 148 | 2017-08-15 | Quito | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2805229 | 3000886 | 2017-08-15 | 9 | 31 | 121.00 | 8 | 2017-08-15 | Quito | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2805230 | 3000887 | 2017-08-15 | 9 | 32 | 16.00 | 0 | 2017-08-15 | Quito | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
2805231 rows × 35 columns
# One-hot encoding the City column
Encoding_city = pd.get_dummies(Train.city)
Encoding_city
| Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805227 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805228 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805229 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805230 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
2805231 rows × 22 columns
# joining the encoded city to the Train set and dropping the city column
Train = Train.join(pd.get_dummies(Train.city)).drop(columns='city')
Train
| id | date_x | store_nbr | family | sales | onpromotion | Sales_date | state | type_x | cluster | dcoilwtico | type_y | locale | locale_name | transferred | sales_date | date_y | transactions | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | season | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 561 | 2013-01-01 | 25 | 0 | 0.00 | 0 | 2013-01-01 | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 562 | 2013-01-01 | 25 | 1 | 0.00 | 0 | 2013-01-01 | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 563 | 2013-01-01 | 25 | 2 | 2.00 | 0 | 2013-01-01 | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 564 | 2013-01-01 | 25 | 3 | 810.00 | 0 | 2013-01-01 | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 565 | 2013-01-01 | 25 | 4 | 0.00 | 0 | 2013-01-01 | 13 | D | 1 | 93.14 | Holiday | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Winter | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 3000883 | 2017-08-15 | 9 | 28 | 438.13 | 0 | 2017-08-15 | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805227 | 3000884 | 2017-08-15 | 9 | 29 | 154.55 | 1 | 2017-08-15 | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805228 | 3000885 | 2017-08-15 | 9 | 30 | 2419.73 | 148 | 2017-08-15 | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805229 | 3000886 | 2017-08-15 | 9 | 31 | 121.00 | 8 | 2017-08-15 | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805230 | 3000887 | 2017-08-15 | 9 | 32 | 16.00 | 0 | 2017-08-15 | 12 | B | 6 | 47.57 | Holiday | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
2805231 rows × 56 columns
# Dropping columns
Train.drop(columns = ['date_x', 'type_x', 'type_y', 'season'], inplace = True)
# finding the correlation
Train.corr()
| id | store_nbr | family | sales | onpromotion | state | cluster | dcoilwtico | locale | locale_name | transferred | transactions | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| id | 1.00 | 0.02 | 0.00 | 0.07 | 0.20 | -0.00 | -0.02 | -0.65 | -0.02 | -0.05 | 0.05 | -0.03 | 0.06 | 0.00 | 0.06 | 0.05 | 0.00 | 0.98 | 0.00 | 0.00 | 0.00 | 0.06 | 0.00 | 0.00 | 0.01 | 0.01 | -0.01 | -0.01 | -0.01 | 0.03 | -0.01 | -0.01 | -0.01 | -0.01 | 0.01 | -0.01 | -0.01 | 0.01 | -0.01 | -0.01 | 0.07 | -0.01 | 0.09 | -0.01 | -0.02 | -0.01 | -0.01 | 0.03 |
| store_nbr | 0.02 | 1.00 | 0.00 | 0.06 | 0.02 | -0.20 | -0.08 | -0.01 | -0.00 | -0.00 | 0.00 | 0.19 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.13 | 0.04 | -0.15 | 0.17 | 0.00 | 0.25 | 0.15 | -0.07 | 0.14 | -0.11 | -0.19 | 0.08 | 0.10 | 0.18 | 0.21 | 0.07 | -0.03 | 0.06 | -0.26 | -0.12 | -0.02 | -0.21 |
| family | 0.00 | 0.00 | 1.00 | -0.12 | -0.05 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 |
| sales | 0.07 | 0.06 | -0.12 | 1.00 | 0.43 | 0.07 | 0.04 | -0.07 | -0.00 | -0.02 | -0.00 | 0.21 | 0.02 | -0.01 | 0.02 | 0.02 | 0.04 | 0.07 | 0.05 | 0.01 | 0.00 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | -0.00 | -0.01 | 0.02 | -0.00 | -0.01 | -0.02 | -0.01 | -0.02 | -0.03 | -0.02 | -0.03 | -0.01 | -0.01 | -0.02 | -0.01 | -0.03 | -0.02 | -0.02 | 0.12 | -0.02 | -0.02 | -0.02 |
| onpromotion | 0.20 | 0.02 | -0.05 | 0.43 | 1.00 | 0.01 | 0.00 | -0.12 | -0.00 | -0.02 | 0.02 | 0.03 | 0.02 | -0.00 | 0.03 | 0.02 | -0.00 | 0.20 | -0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.01 | -0.00 | -0.00 | -0.01 | -0.01 | -0.01 | -0.01 | -0.01 | 0.00 | -0.00 | -0.00 | 0.03 | -0.01 | 0.01 | -0.00 | 0.01 | -0.01 | -0.01 | 0.00 |
| state | -0.00 | -0.20 | -0.00 | 0.07 | 0.01 | 1.00 | 0.16 | 0.00 | -0.00 | -0.00 | 0.00 | 0.35 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | 0.31 | 0.01 | 0.11 | -0.48 | -0.10 | 0.04 | -0.13 | -0.27 | -0.28 | -0.06 | -0.28 | -0.09 | -0.03 | -0.24 | 0.04 | -0.10 | 0.05 | 0.01 | 0.57 | -0.23 | 0.14 | 0.29 |
| cluster | -0.02 | -0.08 | -0.00 | 0.04 | 0.00 | 0.16 | 1.00 | 0.01 | -0.00 | 0.00 | -0.00 | 0.20 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.13 | 0.04 | -0.08 | -0.23 | -0.23 | -0.17 | 0.04 | 0.20 | -0.10 | 0.20 | 0.28 | 0.04 | -0.14 | -0.22 | 0.11 | -0.17 | -0.03 | -0.17 | 0.38 | -0.05 | -0.22 | -0.22 |
| dcoilwtico | -0.65 | -0.01 | -0.00 | -0.07 | -0.12 | 0.00 | 0.01 | 1.00 | 0.01 | 0.05 | -0.01 | -0.04 | 0.01 | 0.00 | 0.00 | 0.01 | -0.30 | -0.65 | -0.38 | -0.01 | -0.00 | 0.01 | 0.03 | -0.00 | -0.01 | -0.01 | 0.01 | 0.01 | 0.01 | -0.02 | 0.01 | 0.01 | 0.01 | 0.01 | -0.01 | 0.01 | 0.01 | -0.00 | 0.01 | 0.01 | -0.05 | 0.01 | -0.06 | 0.01 | 0.02 | 0.00 | 0.01 | -0.02 |
| locale | -0.02 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.01 | 1.00 | 0.34 | -0.05 | -0.00 | -0.08 | 0.00 | -0.08 | -0.08 | 0.01 | -0.00 | 0.01 | 0.10 | 0.04 | -0.09 | 0.14 | 0.02 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 |
| locale_name | -0.05 | -0.00 | -0.00 | -0.02 | -0.02 | -0.00 | 0.00 | 0.05 | 0.34 | 1.00 | -0.18 | -0.05 | -0.10 | 0.02 | -0.10 | -0.09 | -0.01 | -0.03 | -0.02 | -0.07 | -0.01 | -0.11 | -0.09 | -0.04 | -0.13 | -0.13 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.01 | 0.00 | -0.01 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 |
| transferred | 0.05 | 0.00 | 0.00 | -0.00 | 0.02 | 0.00 | -0.00 | -0.01 | -0.05 | -0.18 | 1.00 | -0.01 | 0.02 | -0.00 | 0.02 | 0.02 | -0.04 | 0.05 | -0.04 | -0.01 | -0.01 | 0.03 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.00 |
| transactions | -0.03 | 0.19 | -0.00 | 0.21 | 0.03 | 0.35 | 0.20 | -0.04 | -0.00 | -0.05 | -0.01 | 1.00 | 0.04 | -0.02 | 0.04 | 0.03 | 0.10 | -0.04 | 0.13 | 0.02 | 0.01 | 0.03 | 0.01 | 0.01 | 0.04 | 0.04 | 0.03 | -0.05 | 0.10 | -0.07 | -0.03 | -0.12 | -0.06 | -0.06 | -0.16 | -0.06 | -0.13 | -0.08 | 0.00 | -0.11 | -0.07 | -0.15 | -0.09 | -0.10 | 0.56 | -0.04 | -0.11 | -0.13 |
| month | 0.06 | 0.00 | 0.00 | 0.02 | 0.02 | -0.00 | -0.00 | 0.01 | -0.08 | -0.10 | 0.02 | 0.04 | 1.00 | -0.00 | 1.00 | 0.98 | -0.01 | -0.15 | -0.00 | 0.02 | -0.00 | 0.97 | 0.01 | 0.03 | 0.08 | 0.08 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.01 | -0.00 | -0.00 | 0.01 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | 0.00 |
| day_of_month | 0.00 | -0.00 | 0.00 | -0.01 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.02 | -0.00 | -0.02 | -0.00 | 1.00 | 0.08 | 0.06 | 0.00 | -0.01 | 0.00 | -0.30 | 0.31 | -0.00 | -0.15 | 0.17 | 0.08 | 0.08 | -0.00 | -0.00 | 0.00 | 0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | 0.00 |
| day_of_year | 0.06 | 0.00 | 0.00 | 0.02 | 0.03 | -0.00 | -0.00 | 0.00 | -0.08 | -0.10 | 0.02 | 0.04 | 1.00 | 0.08 | 1.00 | 0.98 | -0.01 | -0.15 | -0.00 | -0.00 | 0.02 | 0.97 | -0.00 | 0.04 | 0.09 | 0.09 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.01 | -0.00 | -0.00 | 0.01 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | 0.00 |
| week_of_year | 0.05 | 0.00 | 0.00 | 0.02 | 0.02 | -0.00 | -0.00 | 0.01 | -0.08 | -0.09 | 0.02 | 0.03 | 0.98 | 0.06 | 0.98 | 1.00 | -0.01 | -0.15 | -0.01 | -0.00 | 0.00 | 0.95 | 0.00 | 0.01 | 0.01 | 0.01 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.01 | -0.00 | -0.00 | 0.01 | -0.00 | 0.00 | -0.00 | -0.01 | -0.00 | -0.01 | 0.00 |
| day_of_week | 0.00 | 0.00 | 0.00 | 0.04 | -0.00 | -0.00 | -0.00 | -0.30 | 0.01 | -0.01 | -0.04 | 0.10 | -0.01 | 0.00 | -0.01 | -0.01 | 1.00 | 0.00 | 0.79 | -0.00 | 0.00 | -0.00 | -0.02 | -0.02 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 |
| year | 0.98 | 0.02 | 0.00 | 0.07 | 0.20 | -0.00 | -0.01 | -0.65 | -0.00 | -0.03 | 0.05 | -0.04 | -0.15 | -0.01 | -0.15 | -0.15 | 0.00 | 1.00 | 0.00 | 0.00 | -0.00 | -0.15 | 0.00 | -0.01 | -0.01 | -0.01 | -0.01 | -0.01 | -0.01 | 0.03 | -0.01 | -0.01 | -0.01 | -0.01 | 0.01 | -0.01 | -0.01 | 0.01 | -0.01 | -0.01 | 0.07 | -0.01 | 0.09 | -0.01 | -0.02 | -0.01 | -0.01 | 0.03 |
| is_weekend | 0.00 | 0.00 | 0.00 | 0.05 | -0.03 | -0.00 | -0.00 | -0.38 | 0.01 | -0.02 | -0.04 | 0.13 | -0.00 | 0.00 | -0.00 | -0.01 | 0.79 | 0.00 | 1.00 | 0.01 | 0.00 | -0.00 | -0.01 | -0.03 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 |
| is_month_start | 0.00 | 0.00 | -0.00 | 0.01 | 0.00 | 0.00 | -0.00 | -0.01 | 0.10 | -0.07 | -0.01 | 0.02 | 0.02 | -0.30 | -0.00 | -0.00 | -0.00 | 0.00 | 0.01 | 1.00 | -0.03 | 0.02 | 0.51 | -0.02 | -0.01 | -0.01 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 |
| is_month_end | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | -0.00 | -0.00 | -0.00 | 0.04 | -0.01 | -0.01 | 0.01 | -0.00 | 0.31 | 0.02 | 0.00 | 0.00 | -0.00 | 0.00 | -0.03 | 1.00 | -0.00 | -0.02 | 0.57 | 0.27 | 0.27 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 |
| quarter | 0.06 | 0.00 | 0.00 | 0.02 | 0.02 | -0.00 | -0.00 | 0.01 | -0.09 | -0.11 | 0.03 | 0.03 | 0.97 | -0.00 | 0.97 | 0.95 | -0.00 | -0.15 | -0.00 | 0.02 | -0.00 | 1.00 | 0.04 | -0.00 | 0.07 | 0.07 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.01 | -0.00 | -0.00 | 0.01 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | 0.00 |
| is_quarter_start | 0.00 | 0.00 | -0.00 | 0.01 | 0.00 | 0.00 | -0.00 | 0.03 | 0.14 | -0.09 | -0.00 | 0.01 | 0.01 | -0.15 | -0.00 | 0.00 | -0.02 | 0.00 | -0.01 | 0.51 | -0.02 | 0.04 | 1.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.00 |
| is_quarter_end | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.02 | -0.04 | -0.01 | 0.01 | 0.03 | 0.17 | 0.04 | 0.01 | -0.02 | -0.01 | -0.03 | -0.02 | 0.57 | -0.00 | -0.01 | 1.00 | 0.47 | 0.47 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 |
| is_year_start | 0.01 | -0.00 | 0.00 | 0.01 | 0.00 | -0.00 | -0.00 | -0.01 | 0.01 | -0.13 | -0.00 | 0.04 | 0.08 | 0.08 | 0.09 | 0.01 | -0.00 | -0.01 | -0.00 | -0.01 | 0.27 | 0.07 | -0.00 | 0.47 | 1.00 | 1.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 |
| is_year_end | 0.01 | -0.00 | 0.00 | 0.01 | 0.00 | -0.00 | -0.00 | -0.01 | 0.01 | -0.13 | -0.00 | 0.04 | 0.08 | 0.08 | 0.09 | 0.01 | -0.00 | -0.01 | -0.00 | -0.01 | 0.27 | 0.07 | -0.00 | 0.47 | 1.00 | 1.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | 0.00 |
| Ambato | -0.01 | 0.13 | -0.00 | -0.00 | -0.00 | 0.31 | 0.13 | 0.01 | 0.00 | 0.00 | -0.00 | 0.03 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | 1.00 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.03 | -0.03 | -0.09 | -0.03 | -0.04 | -0.03 | -0.03 | -0.04 | -0.03 | -0.03 | -0.02 | -0.03 | -0.15 | -0.03 | -0.03 | -0.05 |
| Babahoyo | -0.01 | 0.04 | -0.00 | -0.01 | -0.00 | 0.01 | 0.04 | 0.01 | 0.00 | 0.00 | -0.00 | -0.05 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | 1.00 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Cayambe | -0.01 | -0.15 | -0.00 | 0.02 | 0.00 | 0.11 | -0.08 | 0.01 | 0.00 | 0.00 | -0.00 | 0.10 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | 1.00 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Cuenca | 0.03 | 0.17 | 0.00 | -0.00 | 0.01 | -0.48 | -0.23 | -0.02 | 0.00 | -0.00 | 0.00 | -0.07 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | -0.05 | -0.03 | -0.03 | 1.00 | -0.03 | -0.03 | -0.03 | -0.03 | -0.10 | -0.03 | -0.05 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.02 | -0.03 | -0.17 | -0.03 | -0.03 | -0.05 |
| Daule | -0.01 | 0.00 | -0.00 | -0.01 | -0.00 | -0.10 | -0.23 | 0.01 | 0.00 | 0.00 | -0.00 | -0.03 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | 1.00 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| El Carmen | -0.01 | 0.25 | -0.00 | -0.02 | -0.00 | 0.04 | -0.17 | 0.01 | 0.00 | 0.00 | -0.00 | -0.12 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.01 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | 1.00 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Esmeraldas | -0.01 | 0.15 | -0.00 | -0.01 | -0.01 | -0.13 | 0.04 | 0.01 | 0.00 | 0.00 | -0.00 | -0.06 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.01 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | 1.00 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Guaranda | -0.01 | -0.07 | -0.00 | -0.02 | -0.01 | -0.27 | 0.20 | 0.01 | 0.00 | 0.00 | -0.00 | -0.06 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | 1.00 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Guayaquil | 0.01 | 0.14 | 0.00 | -0.03 | -0.01 | -0.28 | -0.10 | -0.01 | 0.00 | 0.00 | -0.00 | -0.16 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.09 | -0.06 | -0.06 | -0.10 | -0.06 | -0.06 | -0.06 | -0.06 | 1.00 | -0.06 | -0.08 | -0.06 | -0.06 | -0.09 | -0.05 | -0.06 | -0.04 | -0.06 | -0.31 | -0.06 | -0.06 | -0.10 |
| Ibarra | -0.01 | -0.11 | -0.00 | -0.02 | -0.01 | -0.06 | 0.20 | 0.01 | 0.00 | 0.00 | -0.00 | -0.06 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | 1.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Latacunga | -0.01 | -0.19 | -0.00 | -0.03 | -0.01 | -0.28 | 0.28 | 0.01 | 0.00 | 0.00 | -0.00 | -0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.01 | -0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.04 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.03 | -0.03 | -0.08 | -0.03 | 1.00 | -0.03 | -0.03 | -0.04 | -0.03 | -0.03 | -0.02 | -0.03 | -0.15 | -0.03 | -0.03 | -0.05 |
| Libertad | 0.01 | 0.08 | -0.00 | -0.01 | 0.00 | -0.09 | 0.04 | -0.00 | -0.00 | -0.00 | 0.00 | -0.08 | 0.01 | 0.00 | 0.01 | 0.01 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | 1.00 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.10 | -0.02 | -0.02 | -0.03 |
| Loja | -0.01 | 0.10 | -0.00 | -0.01 | -0.00 | -0.03 | -0.14 | 0.01 | 0.00 | 0.00 | -0.00 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | 1.00 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Machala | -0.01 | 0.18 | -0.00 | -0.02 | -0.00 | -0.24 | -0.22 | 0.01 | 0.00 | 0.00 | -0.00 | -0.11 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.04 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.03 | -0.03 | -0.09 | -0.03 | -0.04 | -0.03 | -0.03 | 1.00 | -0.03 | -0.03 | -0.02 | -0.03 | -0.15 | -0.03 | -0.03 | -0.05 |
| Manta | 0.07 | 0.21 | 0.00 | -0.01 | 0.03 | 0.04 | 0.11 | -0.05 | -0.00 | -0.01 | 0.00 | -0.07 | 0.01 | 0.00 | 0.01 | 0.01 | 0.00 | 0.07 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.05 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | 1.00 | -0.02 | -0.01 | -0.02 | -0.09 | -0.02 | -0.02 | -0.03 |
| Playas | -0.01 | 0.07 | -0.00 | -0.03 | -0.01 | -0.10 | -0.17 | 0.01 | 0.00 | 0.00 | -0.00 | -0.15 | -0.00 | -0.00 | -0.00 | -0.00 | 0.00 | -0.01 | 0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | 1.00 | -0.01 | -0.02 | -0.11 | -0.02 | -0.02 | -0.03 |
| Puyo | 0.09 | -0.03 | 0.00 | -0.02 | 0.01 | 0.05 | -0.03 | -0.06 | -0.00 | -0.01 | 0.00 | -0.09 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.09 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.02 | -0.01 | -0.01 | -0.02 | -0.01 | -0.01 | -0.01 | -0.01 | -0.04 | -0.01 | -0.02 | -0.01 | -0.01 | -0.02 | -0.01 | -0.01 | 1.00 | -0.01 | -0.07 | -0.01 | -0.01 | -0.02 |
| Quevedo | -0.01 | 0.06 | -0.00 | -0.02 | -0.00 | 0.01 | -0.17 | 0.01 | 0.00 | 0.00 | -0.00 | -0.10 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.01 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | 1.00 | -0.11 | -0.02 | -0.02 | -0.03 |
| Quito | -0.02 | -0.26 | -0.00 | 0.12 | 0.01 | 0.57 | 0.38 | 0.02 | 0.00 | 0.00 | -0.00 | 0.56 | -0.00 | 0.00 | -0.00 | -0.01 | -0.00 | -0.02 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.00 | -0.15 | -0.11 | -0.11 | -0.17 | -0.11 | -0.11 | -0.11 | -0.11 | -0.31 | -0.11 | -0.15 | -0.10 | -0.11 | -0.15 | -0.09 | -0.11 | -0.07 | -0.11 | 1.00 | -0.10 | -0.10 | -0.17 |
| Riobamba | -0.01 | -0.12 | -0.00 | -0.02 | -0.01 | -0.23 | -0.05 | 0.00 | 0.00 | 0.00 | -0.00 | -0.04 | -0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.01 | 0.00 | 0.00 | 0.00 | -0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.10 | 1.00 | -0.02 | -0.03 |
| Salinas | -0.01 | -0.02 | -0.00 | -0.02 | -0.01 | 0.14 | -0.22 | 0.01 | -0.00 | -0.00 | 0.00 | -0.11 | -0.01 | -0.00 | -0.01 | -0.01 | 0.00 | -0.01 | 0.00 | 0.00 | -0.00 | -0.01 | 0.00 | -0.00 | 0.00 | 0.00 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.02 | -0.02 | -0.06 | -0.02 | -0.03 | -0.02 | -0.02 | -0.03 | -0.02 | -0.02 | -0.01 | -0.02 | -0.10 | -0.02 | 1.00 | -0.03 |
| Santo Domingo | 0.03 | -0.21 | 0.00 | -0.02 | 0.00 | 0.29 | -0.22 | -0.02 | -0.00 | -0.00 | 0.00 | -0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | 0.00 | -0.05 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.03 | -0.03 | -0.10 | -0.03 | -0.05 | -0.03 | -0.03 | -0.05 | -0.03 | -0.03 | -0.02 | -0.03 | -0.17 | -0.03 | -0.03 | 1.00 |
plt.figure(figsize = (14,14))
sns.heatmap(Train.corr(), vmin = -1 , cmap = 'YlGnBu')
<AxesSubplot:>
Train
| id | store_nbr | family | sales | onpromotion | Sales_date | state | cluster | dcoilwtico | locale | locale_name | transferred | sales_date | date_y | transactions | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 561 | 25 | 0 | 0.00 | 0 | 2013-01-01 | 13 | 1 | 93.14 | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 562 | 25 | 1 | 0.00 | 0 | 2013-01-01 | 13 | 1 | 93.14 | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 563 | 25 | 2 | 2.00 | 0 | 2013-01-01 | 13 | 1 | 93.14 | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 564 | 25 | 3 | 810.00 | 0 | 2013-01-01 | 13 | 1 | 93.14 | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 565 | 25 | 4 | 0.00 | 0 | 2013-01-01 | 13 | 1 | 93.14 | 1 | 4 | False | 2013-01-01 | 2013-01-01 | 770 | 2013-01-01 | 1 | 1 | 1 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 3000883 | 9 | 28 | 438.13 | 0 | 2017-08-15 | 12 | 6 | 47.57 | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805227 | 3000884 | 9 | 29 | 154.55 | 1 | 2017-08-15 | 12 | 6 | 47.57 | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805228 | 3000885 | 9 | 30 | 2419.73 | 148 | 2017-08-15 | 12 | 6 | 47.57 | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805229 | 3000886 | 9 | 31 | 121.00 | 8 | 2017-08-15 | 12 | 6 | 47.57 | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2805230 | 3000887 | 9 | 32 | 16.00 | 0 | 2017-08-15 | 12 | 6 | 47.57 | 0 | 19 | False | 2017-08-15 | 2017-08-15 | 2155 | 2017-08-15 | 8 | 15 | 227 | 33 | 1 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
2805231 rows × 52 columns
#Train.drop(columns = ['Sales_date','sales_date', 'id','date_y','date','locale_name'], inplace = True)
#Train
Train.drop(columns = ['Sales_date','sales_date', 'id','date_y','date','locale_name', 'state','cluster', 'day_of_year','week_of_year', 'day_of_week', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'Ambato','El Carmen', 'Guaranda','Ibarra', 'Latacunga' ,'Loja', 'Puyo', 'Riobamba'], inplace = True)
Train
| store_nbr | family | sales | onpromotion | dcoilwtico | locale | transferred | transactions | month | day_of_month | year | is_weekend | is_month_start | is_month_end | quarter | Babahoyo | Cayambe | Cuenca | Daule | Esmeraldas | Guayaquil | Libertad | Machala | Manta | Playas | Quevedo | Quito | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 0 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 25 | 1 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 25 | 2 | 2.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 25 | 3 | 810.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 25 | 4 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2805226 | 9 | 28 | 438.13 | 0 | 47.57 | 0 | False | 2155 | 8 | 15 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2805227 | 9 | 29 | 154.55 | 1 | 47.57 | 0 | False | 2155 | 8 | 15 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2805228 | 9 | 30 | 2419.73 | 148 | 47.57 | 0 | False | 2155 | 8 | 15 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2805229 | 9 | 31 | 121.00 | 8 | 47.57 | 0 | False | 2155 | 8 | 15 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2805230 | 9 | 32 | 16.00 | 0 | 47.57 | 0 | False | 2155 | 8 | 15 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2805231 rows × 29 columns
Train[Train["year"] <= 2016]
| store_nbr | family | sales | onpromotion | dcoilwtico | locale | transferred | transactions | month | day_of_month | year | is_weekend | is_month_start | is_month_end | quarter | Babahoyo | Cayambe | Cuenca | Daule | Esmeraldas | Guayaquil | Libertad | Machala | Manta | Playas | Quevedo | Quito | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 0 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 25 | 1 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 25 | 2 | 2.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 25 | 3 | 810.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 25 | 4 | 0.00 | 0 | 93.14 | 1 | False | 770 | 1 | 1 | 2013 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2398930 | 9 | 28 | 687.85 | 1 | 47.26 | 1 | False | 2998 | 12 | 31 | 2016 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2398931 | 9 | 29 | 100.41 | 1 | 47.26 | 1 | False | 2998 | 12 | 31 | 2016 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2398932 | 9 | 30 | 3091.36 | 3 | 47.26 | 1 | False | 2998 | 12 | 31 | 2016 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2398933 | 9 | 31 | 2.00 | 0 | 47.26 | 1 | False | 2998 | 12 | 31 | 2016 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 2398934 | 9 | 32 | 13.00 | 2 | 47.26 | 1 | False | 2998 | 12 | 31 | 2016 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2398935 rows × 29 columns
# Splitting the data into train and test with years
train = Train.loc[(Train['year'].isin([2013, 2014, 2015, 2016]) & Train['year'].isin([2013, 2014, 2015, 2016]))]
test = Train.loc[(Train['year'].isin([2017]) & Train['year'].isin([2017]))]
#test.year.unique(), test.month.unique()
test.year.unique()
array([2017])
train[train['year'] <= 2016].year.unique()
array([2013, 2014, 2015, 2016])
decision_tree_model = DecisionTreeRegressor(random_state = 0)
X_train = train.drop(columns = ['sales'])
Y_train = train['sales']
decision_tree_model.fit(X_train,Y_train)
DecisionTreeRegressor(random_state=0)
X_test = test.drop(columns = ['sales'])
Y_test = test['sales']
y_pred = decision_tree_model.predict(X_test)
mean_absolute_error(Y_test,y_pred)
100.3180164655628
np.sqrt(mean_squared_error(Y_test,y_pred))
489.0291273941447
y_pred
array([ 8. , 0. , 9. , ..., 2992.434, 46. , 17. ])
np.sqrt(mean_squared_log_error(Y_test, y_pred))
0.6730110298975375
## get importance
dt_importance = decision_tree_model.feature_importances_
dt_importance = pd.DataFrame(dt_importance, columns = ["score"]).reset_index()
dt_importance["Feature"] = list(X_train.columns)
dt_importance.drop(columns = ["index"], inplace = True)
dt_importance.sort_values(by = "score", ascending = False).head()
| score | Feature | |
|---|---|---|
| 1 | 0.44 | family |
| 6 | 0.21 | transactions |
| 2 | 0.20 | onpromotion |
| 0 | 0.04 | store_nbr |
| 9 | 0.04 | year |
linear_model = LinearRegression()
linear_model.fit(X_train,Y_train)
LinearRegression()
y_pred = linear_model.predict(X_test)
mean_absolute_error(Y_test,y_pred)
506.8056281030758
np.sqrt(mean_squared_error(Y_test,y_pred))
1088.9994047831015
#np.sqrt(mean_squared_log_error(Y_test, y_pred))
importance = linear_model.coef_
# summarize feature importance
for i,v in enumerate(importance):
print('Feature: %0d, Score: %.5f' % (i,v))
Feature: 0, Score: 0.98121 Feature: 1, Score: -12.67421 Feature: 2, Score: 35.26731 Feature: 3, Score: 0.24838 Feature: 4, Score: -1.30983 Feature: 5, Score: -34.91211 Feature: 6, Score: 0.21826 Feature: 7, Score: -4.95548 Feature: 8, Score: -1.16457 Feature: 9, Score: 11.36033 Feature: 10, Score: 100.38294 Feature: 11, Score: 33.66547 Feature: 12, Score: 16.46436 Feature: 13, Score: 15.78784 Feature: 14, Score: 50.23434 Feature: 15, Score: 26.51901 Feature: 16, Score: 56.47015 Feature: 17, Score: 50.02481 Feature: 18, Score: 43.41133 Feature: 19, Score: 36.80265 Feature: 20, Score: 68.61446 Feature: 21, Score: 63.57780 Feature: 22, Score: -38.77402 Feature: 23, Score: 45.71495 Feature: 24, Score: 68.49638 Feature: 25, Score: 59.41392 Feature: 26, Score: 63.78353 Feature: 27, Score: 54.53111
# getting feature importance
importance = pd.DataFrame(importance, columns = ["score"]).reset_index()
importance["Feature"] = list(X_train.columns)
importance.drop(columns = ["index"], inplace = True)
importance.sort_values(by = "score", ascending = False)
importance
| score | Feature | |
|---|---|---|
| 0 | 0.98 | store_nbr |
| 1 | -12.67 | family |
| 2 | 35.27 | onpromotion |
| 3 | 0.25 | dcoilwtico |
| 4 | -1.31 | locale |
| 5 | -34.91 | transferred |
| 6 | 0.22 | transactions |
| 7 | -4.96 | month |
| 8 | -1.16 | day_of_month |
| 9 | 11.36 | year |
| 10 | 100.38 | is_weekend |
| 11 | 33.67 | is_month_start |
| 12 | 16.46 | is_month_end |
| 13 | 15.79 | quarter |
| 14 | 50.23 | Babahoyo |
| 15 | 26.52 | Cayambe |
| 16 | 56.47 | Cuenca |
| 17 | 50.02 | Daule |
| 18 | 43.41 | Esmeraldas |
| 19 | 36.80 | Guayaquil |
| 20 | 68.61 | Libertad |
| 21 | 63.58 | Machala |
| 22 | -38.77 | Manta |
| 23 | 45.71 | Playas |
| 24 | 68.50 | Quevedo |
| 25 | 59.41 | Quito |
| 26 | 63.78 | Salinas |
| 27 | 54.53 | Santo Domingo |
fig = px.bar(importance, x = "Feature", y = "score")
fig.show()
# Evaluate Linear model RMSE
scores = cross_val_score(linear_model, X_train, Y_train, scoring = "neg_mean_squared_error", cv = 5, n_jobs = 1)
rmse = np.sqrt(-scores)
print("RMSE values: ", np.round(rmse, 2))
print("RMSE average: ", np.mean(rmse))
RMSE values: [ 738.66 931.09 1016.22 1137.29 1103.76] RMSE average: 985.4054742351915
#np.sqrt(mean_squared_log_error(Y_test, y_pred))
rf = RandomForestRegressor(n_estimators = 200, max_features = 'sqrt', max_depth = 5, random_state = 18).fit(X_train, Y_train)
rf
RandomForestRegressor(max_depth=5, max_features='sqrt', n_estimators=200,
random_state=18)
y_pred = rf.predict(X_test)
mean_absolute_error(Y_test,y_pred)
482.70102418957697
np.sqrt(mean_squared_error(Y_test,y_pred))
1035.5631914855212
np.sqrt(mean_squared_log_error(Y_test, y_pred))
3.0288017106346996
# Viewing the test dataset
test_set
| id | date | store_nbr | family | onpromotion | |
|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 |
| 1 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 |
| 2 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 |
| 3 | 3000891 | 2017-08-16 | 1 | BEVERAGES | 20 |
| 4 | 3000892 | 2017-08-16 | 1 | BOOKS | 0 |
| ... | ... | ... | ... | ... | ... |
| 28507 | 3029395 | 2017-08-31 | 9 | POULTRY | 1 |
| 28508 | 3029396 | 2017-08-31 | 9 | PREPARED FOODS | 0 |
| 28509 | 3029397 | 2017-08-31 | 9 | PRODUCE | 1 |
| 28510 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 |
| 28511 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 |
28512 rows × 5 columns
# Checking if there are any missing dates
test_set_range = test_set.date.min(), test_set.date.max()
test_set_range
('2017-08-16', '2017-08-31')
# Number of expected dates
expected_test_days = pd.date_range(start = test_set["date"].min(), end = test_set["date"].max())
expected_test_days
DatetimeIndex(['2017-08-16', '2017-08-17', '2017-08-18', '2017-08-19',
'2017-08-20', '2017-08-21', '2017-08-22', '2017-08-23',
'2017-08-24', '2017-08-25', '2017-08-26', '2017-08-27',
'2017-08-28', '2017-08-29', '2017-08-30', '2017-08-31'],
dtype='datetime64[ns]', freq='D')
#Previewing holiday data
holidays
| Sales_date | type | locale | locale_name | transferred | sales_date | |
|---|---|---|---|---|---|---|
| 0 | 2014-07-24 | Additional | Local | Guayaquil | False | 2014-07-24 |
| 1 | 2016-09-27 | NaN | NaN | NaN | NaN | NaN |
| 2 | 2013-05-10 | NaN | NaN | NaN | NaN | NaN |
| 3 | 2015-06-17 | NaN | NaN | NaN | NaN | NaN |
| 4 | 2015-10-22 | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| 1713 | 2016-12-26 | Additional | National | Ecuador | False | 2016-12-26 |
| 1714 | 2015-02-24 | NaN | NaN | NaN | NaN | NaN |
| 1715 | 2013-01-07 | NaN | NaN | NaN | NaN | NaN |
| 1716 | 2013-11-17 | NaN | NaN | NaN | NaN | NaN |
| 1717 | 2014-02-24 | NaN | NaN | NaN | NaN | NaN |
1718 rows × 6 columns
# Getting missing dates
holidays.rename(columns = {"sales_date":"date"}, inplace = True)
# Finding missing holiday dates
missing_holiday_dates = set(expected_test_days.date) - set(holidays["date"].unique())
missing_holiday_dates
{datetime.date(2017, 8, 16),
datetime.date(2017, 8, 17),
datetime.date(2017, 8, 18),
datetime.date(2017, 8, 19),
datetime.date(2017, 8, 20),
datetime.date(2017, 8, 21),
datetime.date(2017, 8, 22),
datetime.date(2017, 8, 23),
datetime.date(2017, 8, 24),
datetime.date(2017, 8, 25),
datetime.date(2017, 8, 26),
datetime.date(2017, 8, 27),
datetime.date(2017, 8, 28),
datetime.date(2017, 8, 29),
datetime.date(2017, 8, 30),
datetime.date(2017, 8, 31)}
# Creating a dataframe for the missing dates in the holiday data
holidays_addition = pd.DataFrame(missing_holiday_dates, columns = ["date"])
holidays_addition
| date | |
|---|---|
| 0 | 2017-08-24 |
| 1 | 2017-08-18 |
| 2 | 2017-08-16 |
| 3 | 2017-08-27 |
| 4 | 2017-08-22 |
| 5 | 2017-08-29 |
| 6 | 2017-08-28 |
| 7 | 2017-08-31 |
| 8 | 2017-08-19 |
| 9 | 2017-08-26 |
| 10 | 2017-08-21 |
| 11 | 2017-08-30 |
| 12 | 2017-08-20 |
| 13 | 2017-08-25 |
| 14 | 2017-08-23 |
| 15 | 2017-08-17 |
# Adding the missing holiday dates to the main dataframe
holidays = pd.concat([holidays, holidays_addition], ignore_index=True)
holidays["date"] = pd.to_datetime(holidays["date"]).dt.date
holidays
| Sales_date | type | locale | locale_name | transferred | date | |
|---|---|---|---|---|---|---|
| 0 | 2014-07-24 | Additional | Local | Guayaquil | False | 2014-07-24 |
| 1 | 2016-09-27 | NaN | NaN | NaN | NaN | NaT |
| 2 | 2013-05-10 | NaN | NaN | NaN | NaN | NaT |
| 3 | 2015-06-17 | NaN | NaN | NaN | NaN | NaT |
| 4 | 2015-10-22 | NaN | NaN | NaN | NaN | NaT |
| ... | ... | ... | ... | ... | ... | ... |
| 1729 | NaN | NaN | NaN | NaN | NaN | 2017-08-30 |
| 1730 | NaN | NaN | NaN | NaN | NaN | 2017-08-20 |
| 1731 | NaN | NaN | NaN | NaN | NaN | 2017-08-25 |
| 1732 | NaN | NaN | NaN | NaN | NaN | 2017-08-23 |
| 1733 | NaN | NaN | NaN | NaN | NaN | 2017-08-17 |
1734 rows × 6 columns
# Filling in missing values with variables
holidays["type"] = holidays["type"].fillna("Work Day")
holidays["locale"] = holidays["locale"].fillna("National")
holidays["locale_name"] = holidays["locale_name"].fillna("Ecuador")
holidays["transferred"] = holidays["transferred"].fillna(False)
# Merging test set with stores
Test = pd.merge(test_set, stores)
Test
| id | date | store_nbr | family | onpromotion | city | state | type | cluster | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 |
| 1 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 |
| 2 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 | Quito | Pichincha | D | 13 |
| 3 | 3000891 | 2017-08-16 | 1 | BEVERAGES | 20 | Quito | Pichincha | D | 13 |
| 4 | 3000892 | 2017-08-16 | 1 | BOOKS | 0 | Quito | Pichincha | D | 13 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28507 | 3029395 | 2017-08-31 | 9 | POULTRY | 1 | Quito | Pichincha | B | 6 |
| 28508 | 3029396 | 2017-08-31 | 9 | PREPARED FOODS | 0 | Quito | Pichincha | B | 6 |
| 28509 | 3029397 | 2017-08-31 | 9 | PRODUCE | 1 | Quito | Pichincha | B | 6 |
| 28510 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 |
| 28511 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 |
28512 rows × 9 columns
# Previewing oil dataset
oil
| date | dcoilwtico | Sales_date | |
|---|---|---|---|
| 0 | 2013-01-01 | 93.14 | 2013-01-01 |
| 1 | 2013-01-02 | 93.14 | 2013-01-02 |
| 2 | 2013-01-03 | 92.97 | 2013-01-03 |
| 3 | 2013-01-04 | 93.12 | 2013-01-04 |
| 4 | 2013-01-07 | 93.20 | 2013-01-07 |
| ... | ... | ... | ... |
| 1213 | 2017-08-25 | 47.65 | 2017-08-25 |
| 1214 | 2017-08-28 | 46.40 | 2017-08-28 |
| 1215 | 2017-08-29 | 46.46 | 2017-08-29 |
| 1216 | 2017-08-30 | 45.96 | 2017-08-30 |
| 1217 | 2017-08-31 | 47.26 | 2017-08-31 |
1218 rows × 3 columns
# Getting missing dates
missing_oil_dates = set(expected_test_days.date) - set(oil["date"].unique())
missing_oil_dates
{datetime.date(2017, 8, 16),
datetime.date(2017, 8, 17),
datetime.date(2017, 8, 18),
datetime.date(2017, 8, 19),
datetime.date(2017, 8, 20),
datetime.date(2017, 8, 21),
datetime.date(2017, 8, 22),
datetime.date(2017, 8, 23),
datetime.date(2017, 8, 24),
datetime.date(2017, 8, 25),
datetime.date(2017, 8, 26),
datetime.date(2017, 8, 27),
datetime.date(2017, 8, 28),
datetime.date(2017, 8, 29),
datetime.date(2017, 8, 30),
datetime.date(2017, 8, 31)}
# Adding the missing oil dates to the main dataframe
oil_dates_add = pd.DataFrame(missing_oil_dates, columns = ["date"])
oil = pd.concat([oil, oil_dates_add], ignore_index=True)
oil["Sales_date"] = pd.to_datetime(oil["date"])
oil = oil.sort_values(by = ["Sales_date"], ignore_index = True)
oil.head()
| date | dcoilwtico | Sales_date | |
|---|---|---|---|
| 0 | 2013-01-01 | 93.14 | 2013-01-01 |
| 1 | 2013-01-02 | 93.14 | 2013-01-02 |
| 2 | 2013-01-03 | 92.97 | 2013-01-03 |
| 3 | 2013-01-04 | 93.12 | 2013-01-04 |
| 4 | 2013-01-07 | 93.20 | 2013-01-07 |
# Filling nulls with forward fill and backfill
oil = oil.ffill().bfill()
oil
| date | dcoilwtico | Sales_date | |
|---|---|---|---|
| 0 | 2013-01-01 | 93.14 | 2013-01-01 |
| 1 | 2013-01-02 | 93.14 | 2013-01-02 |
| 2 | 2013-01-03 | 92.97 | 2013-01-03 |
| 3 | 2013-01-04 | 93.12 | 2013-01-04 |
| 4 | 2013-01-07 | 93.20 | 2013-01-07 |
| ... | ... | ... | ... |
| 1229 | 2017-08-29 | 46.46 | 2017-08-29 |
| 1230 | 2017-08-30 | 46.46 | 2017-08-30 |
| 1231 | 2017-08-30 | 45.96 | 2017-08-30 |
| 1232 | 2017-08-31 | 47.26 | 2017-08-31 |
| 1233 | 2017-08-31 | 47.26 | 2017-08-31 |
1234 rows × 3 columns
Test
| id | date | store_nbr | family | onpromotion | city | state | type | cluster | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 |
| 1 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 |
| 2 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 | Quito | Pichincha | D | 13 |
| 3 | 3000891 | 2017-08-16 | 1 | BEVERAGES | 20 | Quito | Pichincha | D | 13 |
| 4 | 3000892 | 2017-08-16 | 1 | BOOKS | 0 | Quito | Pichincha | D | 13 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28507 | 3029395 | 2017-08-31 | 9 | POULTRY | 1 | Quito | Pichincha | B | 6 |
| 28508 | 3029396 | 2017-08-31 | 9 | PREPARED FOODS | 0 | Quito | Pichincha | B | 6 |
| 28509 | 3029397 | 2017-08-31 | 9 | PRODUCE | 1 | Quito | Pichincha | B | 6 |
| 28510 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 |
| 28511 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 |
28512 rows × 9 columns
Test["Sales_date"] = pd.to_datetime(Test["date"])
# Merging the train data with the other dataframes
Test = pd.merge(Test, oil, on = 'Sales_date')
Test
| id | date_x | store_nbr | family | onpromotion | city | state | type | cluster | Sales_date | date_y | dcoilwtico | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 |
| 1 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 |
| 2 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 |
| 3 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 |
| 4 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 3029397 | 2017-08-31 | 9 | PRODUCE | 1 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 |
| 49892 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 |
| 49893 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 |
| 49894 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 |
| 49895 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 |
49896 rows × 12 columns
holidays['Sales_date'] = pd.to_datetime(holidays["date"])
# Merging the Test and holidays dfs
Test = pd.merge(Test, holidays, on = "Sales_date")
Test
| id | date_x | store_nbr | family | onpromotion | city | state | type_x | cluster | Sales_date | date_y | dcoilwtico | type_y | locale | locale_name | transferred | date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False | 2017-08-16 |
| 1 | 3000888 | 2017-08-16 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False | 2017-08-16 |
| 2 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False | 2017-08-16 |
| 3 | 3000889 | 2017-08-16 | 1 | BABY CARE | 0 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False | 2017-08-16 |
| 4 | 3000890 | 2017-08-16 | 1 | BEAUTY | 2 | Quito | Pichincha | D | 13 | 2017-08-16 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False | 2017-08-16 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 3029397 | 2017-08-31 | 9 | PRODUCE | 1 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False | 2017-08-31 |
| 49892 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False | 2017-08-31 |
| 49893 | 3029398 | 2017-08-31 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False | 2017-08-31 |
| 49894 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False | 2017-08-31 |
| 49895 | 3029399 | 2017-08-31 | 9 | SEAFOOD | 0 | Quito | Pichincha | B | 6 | 2017-08-31 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False | 2017-08-31 |
49896 rows × 17 columns
# Dropping columns
Test.drop(columns = ['id', 'date_x','type_x', 'date_y', 'date'], inplace = True)
Test
| store_nbr | family | onpromotion | city | state | cluster | Sales_date | dcoilwtico | type_y | locale | locale_name | transferred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False |
| 1 | 1 | AUTOMOTIVE | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False |
| 2 | 1 | BABY CARE | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False |
| 3 | 1 | BABY CARE | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False |
| 4 | 1 | BEAUTY | 2 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | National | Ecuador | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 9 | PRODUCE | 1 | Quito | Pichincha | 6 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False |
| 49892 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | 6 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False |
| 49893 | 9 | SCHOOL AND OFFICE SUPPLIES | 9 | Quito | Pichincha | 6 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False |
| 49894 | 9 | SEAFOOD | 0 | Quito | Pichincha | 6 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False |
| 49895 | 9 | SEAFOOD | 0 | Quito | Pichincha | 6 | 2017-08-31 | 47.26 | Work Day | National | Ecuador | False |
49896 rows × 12 columns
#Loading & applying the Label Encoder to the transferred column
le = LabelEncoder()
Test['family'] = le.fit_transform(Test.family)
Test['transferred'] = le.fit_transform(Test.transferred)
Test['locale'] = le.fit_transform(Test.locale)
Test['locale_name'] = le.fit_transform(Test.locale_name)
Test.head()
| store_nbr | family | onpromotion | city | state | cluster | Sales_date | dcoilwtico | type_y | locale | locale_name | transferred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | 0 | 0 | 0 |
| 1 | 1 | 0 | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | 0 | 0 | 0 |
| 2 | 1 | 1 | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | 0 | 0 | 0 |
| 3 | 1 | 1 | 0 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | 0 | 0 | 0 |
| 4 | 1 | 2 | 2 | Quito | Pichincha | 13 | 2017-08-16 | 46.80 | Work Day | 0 | 0 | 0 |
# Dropping the state and type_y columns
Test.drop(columns = ['state', 'type_y'], inplace = True)
Test
| store_nbr | family | onpromotion | city | cluster | Sales_date | dcoilwtico | locale | locale_name | transferred | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | Quito | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 |
| 1 | 1 | 0 | 0 | Quito | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 |
| 2 | 1 | 1 | 0 | Quito | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 |
| 3 | 1 | 1 | 0 | Quito | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 |
| 4 | 1 | 2 | 2 | Quito | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 9 | 30 | 1 | Quito | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 |
| 49892 | 9 | 31 | 9 | Quito | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 |
| 49893 | 9 | 31 | 9 | Quito | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 |
| 49894 | 9 | 32 | 0 | Quito | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 |
| 49895 | 9 | 32 | 0 | Quito | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 |
49896 rows × 10 columns
# One-hot encoding the city column
City_encoding = pd.get_dummies(Test.city)
City_encoding
| Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49892 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49893 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49894 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49895 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
49896 rows × 22 columns
# Joining the encoded city to the Test dataframe and dropping the city column
Test = Test.join(pd.get_dummies(Test.city)).drop(columns='city')
Test
| store_nbr | family | onpromotion | cluster | Sales_date | dcoilwtico | locale | locale_name | transferred | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 1 | 0 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 1 | 1 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 3 | 1 | 1 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 4 | 1 | 2 | 2 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 9 | 30 | 1 | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49892 | 9 | 31 | 9 | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49893 | 9 | 31 | 9 | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49894 | 9 | 32 | 0 | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 49895 | 9 | 32 | 0 | 6 | 2017-08-31 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
49896 rows × 31 columns
# Defining a function to get date features from dataframe
def getDateFeatures(df, date):
df['date'] = pd.to_datetime(df[date])
df['month'] = df['date'].dt.month
df['day_of_month'] = df['date'].dt.day
df['day_of_year'] = df['date'].dt.dayofyear
df['week_of_year'] = df['date'].dt.isocalendar().week
df['day_of_week'] = df['date'].dt.dayofweek
df['year'] = df['date'].dt.year
df['is_weekend'] = np.where(df['day_of_week'] > 4, 1, 0)
df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
df['quarter'] = df['date'].dt.quarter
df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
df['is_year_start'] = df['date'].dt.is_year_end.astype(int)
df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
df['season'] = df['month'].apply(getSeason)
return df
# Getting date features matched with Sales
Test = getDateFeatures(Test, 'Sales_date')
pd.set_option('display.max_columns', None)
# Viewing the Test dataframe
Test.head()
| store_nbr | family | onpromotion | cluster | Sales_date | dcoilwtico | locale | locale_name | transferred | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 1 | 1 | 0 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2 | 1 | 1 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 3 | 1 | 1 | 0 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 4 | 1 | 2 | 2 | 13 | 2017-08-16 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
# Dropping the date column in the final Test dataframe
Test.drop(columns = ['Sales_date' ], inplace= True )
Test
| store_nbr | family | onpromotion | cluster | dcoilwtico | locale | locale_name | transferred | Ambato | Babahoyo | Cayambe | Cuenca | Daule | El Carmen | Esmeraldas | Guaranda | Guayaquil | Ibarra | Latacunga | Libertad | Loja | Machala | Manta | Playas | Puyo | Quevedo | Quito | Riobamba | Salinas | Santo Domingo | date | month | day_of_month | day_of_year | week_of_year | day_of_week | year | is_weekend | is_month_start | is_month_end | quarter | is_quarter_start | is_quarter_end | is_year_start | is_year_end | season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 13 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 1 | 1 | 0 | 0 | 13 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 2 | 1 | 1 | 0 | 13 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 3 | 1 | 1 | 0 | 13 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| 4 | 1 | 2 | 2 | 13 | 46.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-16 | 8 | 16 | 228 | 33 | 2 | 2017 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | Summer |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49891 | 9 | 30 | 1 | 6 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-31 | 8 | 31 | 243 | 35 | 3 | 2017 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | Summer |
| 49892 | 9 | 31 | 9 | 6 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-31 | 8 | 31 | 243 | 35 | 3 | 2017 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | Summer |
| 49893 | 9 | 31 | 9 | 6 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-31 | 8 | 31 | 243 | 35 | 3 | 2017 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | Summer |
| 49894 | 9 | 32 | 0 | 6 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-31 | 8 | 31 | 243 | 35 | 3 | 2017 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | Summer |
| 49895 | 9 | 32 | 0 | 6 | 47.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2017-08-31 | 8 | 31 | 243 | 35 | 3 | 2017 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | Summer |
49896 rows × 46 columns
X_predict = Test[["store_nbr", "family", "onpromotion", "locale", "dcoilwtico", "day_of_week",
"day_of_month","is_month_start","is_month_end","day_of_year", "is_weekend",
"week_of_year", "month", "year","quarter","Babahoyo","Cayambe", "Cuenca","Daule", "Esmeraldas" ,"Guayaquil",
"Libertad", "Machala","Playas", "Quevedo", "Quito", "Salinas","Santo Domingo"]]
Y_test = test['sales']
# predictions
predictions = decision_tree_model.predict(X_predict)
# predictions
predictions = pd.DataFrame(predictions)
# renaming columns in predictions
predictions.rename(columns = {0:"sales"}, inplace = True)
predictions
| sales | |
|---|---|
| 0 | 1.00 |
| 1 | 1.00 |
| 2 | 0.00 |
| 3 | 0.00 |
| 4 | 2.00 |
| ... | ... |
| 49891 | 0.00 |
| 49892 | 25.00 |
| 49893 | 25.00 |
| 49894 | 36.77 |
| 49895 | 36.77 |
49896 rows × 1 columns
# Loading sample_submission csv
sample_submission = pd.read_csv("/Users/Admin/Desktop/store-sales-time-series-forecasting/sample_submission.csv")
sample_submission["sales"] = predictions["sales"]
sample_submission
| id | sales | |
|---|---|---|
| 0 | 3000888 | 1.00 |
| 1 | 3000889 | 1.00 |
| 2 | 3000890 | 0.00 |
| 3 | 3000891 | 0.00 |
| 4 | 3000892 | 2.00 |
| ... | ... | ... |
| 28507 | 3029395 | 193.30 |
| 28508 | 3029396 | 25.00 |
| 28509 | 3029397 | 25.00 |
| 28510 | 3029398 | 36.77 |
| 28511 | 3029399 | 36.77 |
28512 rows × 2 columns
sample_submission.to_csv("sample_submission.csv")